@article {1470, title = {Explaining the difference between men{\textquoteright}s and women{\textquoteright}s football}, journal = {PLOS ONE}, volume = {16}, year = {2021}, month = {Apr-08-2021}, pages = {e0255407}, abstract = {Women{\textquoteright}s football is gaining supporters and practitioners worldwide, raising questions about what the differences are with men{\textquoteright}s football. While the two sports are often compared based on the players{\textquoteright} physical attributes, we analyze the spatio-temporal events during matches in the last World Cups to compare male and female teams based on their technical performance. We train an artificial intelligence model to recognize if a team is male or female based on variables that describe a match{\textquoteright}s playing intensity, accuracy, and performance quality. Our model accurately distinguishes between men{\textquoteright}s and women{\textquoteright}s football, revealing crucial technical differences, which we investigate through the extraction of explanations from the classifier{\textquoteright}s decisions. The differences between men{\textquoteright}s and women{\textquoteright}s football are rooted in play accuracy, the recovery time of ball possession, and the players{\textquoteright} performance quality. Our methodology may help journalists and fans understand what makes women{\textquoteright}s football a distinct sport and coaches design tactics tailored to female teams.}, doi = {https://doi.org/10.1371/journal.pone.0255407}, url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0255407}, author = {Luca Pappalardo and Alessio Rossi and Michela Natilli and Paolo Cintia}, editor = {Constantinou, Anthony C.} } @booklet {1425, title = {Mobile phone data analytics against the COVID-19 epidemics in Italy: flow diversity and local job markets during the national lockdown}, year = {2020}, abstract = {Understanding collective mobility patterns is crucial to plan the restart of production and economic activities, which are currently put in stand-by to fight the diffusion of the epidemics. In this report, we use mobile phone data to infer the movements of people between Italian provinces and municipalities, and we analyze the incoming, outcoming and internal mobility flows before and during the national lockdown (March 9th, 2020) and after the closure of non-necessary productive and economic activities (March 23th, 2020). The population flow across provinces and municipalities enable for the modelling of a risk index tailored for the mobility of each municipality or province. Such an index would be a useful indicator to drive counter-measures in reaction to a sudden reactivation of the epidemics. Mobile phone data, even when aggregated to preserve the privacy of individuals, are a useful data source to track the evolution in time of human mobility, hence allowing for monitoring the effectiveness of control measures such as physical distancing. We address the following analytical questions: How does the mobility structure of a territory change? Do incoming and outcoming flows become more predictable during the lockdown, and what are the differences between weekdays and weekends? Can we detect proper local job markets based on human mobility flows, to eventually shape the borders of a local outbreak?}, doi = {https://dx.doi.org/10.32079/ISTI-TR-2020/005}, url = {https://arxiv.org/abs/2004.11278}, author = {Pietro Bonato and Paolo Cintia and Francesco Fabbri and Daniele Fadda and Fosca Giannotti and Pier Luigi Lopalco and Sara Mazzilli and Mirco Nanni and Luca Pappalardo and Dino Pedreschi and Francesco Penone and S Rinzivillo and Giulio Rossetti and Marcello Savarese and Lara Tavoschi} } @article {1421, title = {PRIMULE: Privacy risk mitigation for user profiles}, volume = {125}, year = {2020}, month = {2020/01/01/}, pages = {101786}, abstract = {The availability of mobile phone data has encouraged the development of different data-driven tools, supporting social science studies and providing new data sources to the standard official statistics. However, this particular kind of data are subject to privacy concerns because they can enable the inference of personal and private information. In this paper, we address the privacy issues related to the sharing of user profiles, derived from mobile phone data, by proposing PRIMULE, a privacy risk mitigation strategy. Such a method relies on PRUDEnce (Pratesi et~al., 2018), a privacy risk assessment framework that provides a methodology for systematically identifying risky-users in a set of data. An extensive experimentation on real-world data shows the effectiveness of PRIMULE strategy in terms of both quality of mobile user profiles and utility of these profiles for analytical services such as the Sociometer (Furletti et~al., 2013), a data mining tool for city users classification.}, isbn = {0169-023X}, doi = {https://doi.org/10.1016/j.datak.2019.101786}, url = {https://www.sciencedirect.com/science/article/pii/S0169023X18305342}, author = {Francesca Pratesi and Lorenzo Gabrielli and Paolo Cintia and Anna Monreale and Fosca Giannotti} } @article {1339, title = {The relationship between human mobility and viral transmissibility during the COVID-19 epidemics in Italy}, journal = {arXiv preprint arXiv:2006.03141}, year = {2020}, abstract = {We describe in this report our studies to understand the relationship between human mobility and the spreading of COVID-19, as an aid to manage the restart of the social and economic activities after the lockdown and monitor the epidemics in the coming weeks and months. We compare the evolution (from January to May 2020) of the daily mobility flows in Italy, measured by means of nation-wide mobile phone data, and the evolution of transmissibility, measured by the net reproduction number, i.e., the mean number of secondary infections generated by one primary infector in the presence of control interventions and human behavioural adaptations. We find a striking relationship between the negative variation of mobility flows and the net reproduction number, in all Italian regions, between March 11th and March 18th, when the country entered the lockdown. This observation allows us to quantify the time needed to "switch off" the country mobility (one week) and the time required to bring the net reproduction number below 1 (one week). A reasonably simple regression model provides evidence that the net reproduction number is correlated with a region{\textquoteright}s incoming, outgoing and internal mobility. We also find a strong relationship between the number of days above the epidemic threshold before the mobility flows reduce significantly as an effect of lockdowns, and the total number of confirmed SARS-CoV-2 infections per 100k inhabitants, thus indirectly showing the effectiveness of the lockdown and the other non-pharmaceutical interventions in the containment of the contagion. Our study demonstrates the value of "big" mobility data to the monitoring of key epidemic indicators to inform choices as the epidemics unfolds in the coming months.}, url = {https://arxiv.org/abs/2006.03141}, author = {Paolo Cintia and Daniele Fadda and Fosca Giannotti and Luca Pappalardo and Giulio Rossetti and Dino Pedreschi and S Rinzivillo and Bonato, Pietro and Fabbri, Francesco and Penone, Francesco and Savarese, Marcello and Checchi, Daniele and Chiaromonte, Francesca and Vineis , Paolo and Guzzetta, Giorgio and Riccardo, Flavia and Marziano, Valentina and Poletti, Piero and Trentini, Filippo and Bella, Antonio and Andrianou, Xanthi and Del Manso, Martina and Fabiani, Massimo and Bellino, Stefania and Boros, Stefano and Mateo Urdiales, Alberto and Vescio, Maria Fenicia and Brusaferro, Silvio and Rezza, Giovanni and Pezzotti, Patrizio and Ajelli, Marco and Merler, Stefano} } @article {1302, title = {(So) Big Data and the transformation of the city}, journal = {International Journal of Data Science and Analytics}, year = {2020}, abstract = {The exponential increase in the availability of large-scale mobility data has fueled the vision of smart cities that will transform our lives. The truth is that we have just scratched the surface of the research challenges that should be tackled in order to make this vision a reality. Consequently, there is an increasing interest among different research communities (ranging from civil engineering to computer science) and industrial stakeholders in building knowledge discovery pipelines over such data sources. At the same time, this widespread data availability also raises privacy issues that must be considered by both industrial and academic stakeholders. In this paper, we provide a wide perspective on the role that big data have in reshaping cities. The paper covers the main aspects of urban data analytics, focusing on privacy issues, algorithms, applications and services, and georeferenced data from social media. In discussing these aspects, we leverage, as concrete examples and case studies of urban data science tools, the results obtained in the {\textquotedblleft}City of Citizens{\textquotedblright} thematic area of the Horizon 2020 SoBigData initiative, which includes a virtual research environment with mobility datasets and urban analytics methods developed by several institutions around Europe. We conclude the paper outlining the main research challenges that urban data science has yet to address in order to help make the smart city vision a reality.}, doi = {https://doi.org/10.1007/s41060-020-00207-3}, url = {https://link.springer.com/article/10.1007/s41060-020-00207-3}, author = {Andrienko, Gennady and Andrienko, Natalia and Boldrini, Chiara and Caldarelli, Guido and Paolo Cintia and Cresci, Stefano and Facchini, Angelo and Fosca Giannotti and Gionis, Aristides and Riccardo Guidotti and others} } @article {1294, title = {PlayeRank: data-driven performance evaluation and player ranking in soccer via a machine learning approach}, journal = {ACM Transactions on Intelligent Systems and Technology (TIST)}, volume = {10}, number = {5}, year = {2019}, pages = {1{\textendash}27}, abstract = {The problem of evaluating the performance of soccer players is attracting the interest of many companies and the scientific community, thanks to the availability of massive data capturing all the events generated during a match (e.g., tackles, passes, shots, etc.). Unfortunately, there is no consolidated and widely accepted metric for measuring performance quality in all of its facets. In this article, we design and implement PlayeRank, a data-driven framework that offers a principled multi-dimensional and role-aware evaluation of the performance of soccer players. We build our framework by deploying a massive dataset of soccer-logs and consisting of millions of match events pertaining to four seasons of 18 prominent soccer competitions. By comparing PlayeRank to known algorithms for performance evaluation in soccer, and by exploiting a dataset of players{\textquoteright} evaluations made by professional soccer scouts, we show that PlayeRank significantly outperforms the competitors. We also explore the ratings produced by PlayeRank and discover interesting patterns about the nature of excellent performances and what distinguishes the top players from the others. At the end, we explore some applications of PlayeRank{\textemdash}i.e. searching players and player versatility{\textemdash}showing its flexibility and efficiency, which makes it worth to be used in the design of a scalable platform for soccer analytics.}, doi = {10.1145/3343172}, url = {https://dl.acm.org/doi/abs/10.1145/3343172}, author = {Luca Pappalardo and Paolo Cintia and Ferragina, Paolo and Massucco, Emanuele and Dino Pedreschi and Fosca Giannotti} } @article {1266, title = {A public data set of spatio-temporal match events in soccer competitions}, journal = {Scientific data}, volume = {6}, number = {1}, year = {2019}, pages = {1{\textendash}15}, abstract = {Soccer analytics is attracting increasing interest in academia and industry, thanks to the availability of sensing technologies that provide high-fidelity data streams for every match. Unfortunately, these detailed data are owned by specialized companies and hence are rarely publicly available for scientific research. To fill this gap, this paper describes the largest open collection of soccer-logs ever released, containing all the spatio-temporal events (passes, shots, fouls, etc.) that occured during each match for an entire season of seven prominent soccer competitions. Each match event contains information about its position, time, outcome, player and characteristics. The nature of team sports like soccer, halfway between the abstraction of a game and the reality of complex social systems, combined with the unique size and composition of this dataset, provide an ideal ground for tackling a wide range of data science problems, including the measurement and evaluation of performance, both at individual and at collective level, and the determinants of success and failure.}, doi = {10.1038/s41597-019-0247-7}, url = {https://www.nature.com/articles/s41597-019-0247-7}, author = {Luca Pappalardo and Paolo Cintia and Alessio Rossi and Massucco, Emanuele and Ferragina, Paolo and Dino Pedreschi and Fosca Giannotti} } @article {1295, title = {Relationship between External and Internal Workloads in Elite Soccer Players: Comparison between Rate of Perceived Exertion and Training Load}, journal = {Applied Sciences}, volume = {9}, number = {23}, year = {2019}, pages = {5174}, abstract = {The use of machine learning (ML) in soccer allows for the management of a large amount of data deriving from the monitoring of sessions and matches. Although the rate of perceived exertion (RPE), training load (S-RPE), and global position system (GPS) are standard methodologies used in team sports to assess the internal and external workload; how the external workload affects RPE and S-RPE remains still unclear. This study explores the relationship between both RPE and S-RPE and the training workload through ML. Data were recorded from 22 elite soccer players, in 160 training sessions and 35 matches during the 2015/2016 season, by using GPS tracking technology. A feature selection process was applied to understand which workload features influence RPE and S-RPE the most. Our results show that the training workloads performed in the previous week have a strong effect on perceived exertion and training load. On the other hand, the analysis of our predictions shows higher accuracy for medium RPE and S-RPE values compared with the extremes. These results provide further evidence of the usefulness of ML as a support to athletic trainers and coaches in understanding the relationship between training load and individual-response in team sports.}, doi = {10.3390/app9235174}, url = {https://www.mdpi.com/2076-3417/9/23/5174/htm}, author = {Alessio Rossi and Perri, Enrico and Luca Pappalardo and Paolo Cintia and Iaia, F Marcello} } @article {1086, title = {Effective injury forecasting in soccer with GPS training data and machine learning}, journal = {PloS one}, volume = {13}, number = {7}, year = {2018}, pages = {e0201264}, abstract = {Injuries have a great impact on professional soccer, due to their large influence on team performance and the considerable costs of rehabilitation for players. Existing studies in the literature provide just a preliminary understanding of which factors mostly affect injury risk, while an evaluation of the potential of statistical models in forecasting injuries is still missing. In this paper, we propose a multi-dimensional approach to injury forecasting in professional soccer that is based on GPS measurements and machine learning. By using GPS tracking technology, we collect data describing the training workload of players in a professional soccer club during a season. We then construct an injury forecaster and show that it is both accurate and interpretable by providing a set of case studies of interest to soccer practitioners. Our approach opens a novel perspective on injury prevention, providing a set of simple and practical rules for evaluating and interpreting the complex relations between injury risk and training performance in professional soccer.}, doi = {https://doi.org/10.1371/journal.pone.0201264}, url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0201264}, author = {Alessio Rossi and Luca Pappalardo and Paolo Cintia and Iaia, F Marcello and Fern{\`a}ndez, Javier and Medina, Daniel} } @article {1037, title = {Discovering and Understanding City Events with Big Data: The Case of Rome}, journal = {Information}, volume = {8}, number = {3}, year = {2017}, month = {06/2017}, pages = {74}, abstract = {The increasing availability of large amounts of data and digital footprints has given rise to ambitious research challenges in many fields, which spans from medical research, financial and commercial world, to people and environmental monitoring. Whereas traditional data sources and census fail in capturing actual and up-to-date behaviors, Big Data integrate the missing knowledge providing useful and hidden information to analysts and decision makers. With this paper, we focus on the identification of city events by analyzing mobile phone data (Call Detail Record), and we study and evaluate the impact of these events over the typical city dynamics. We present an analytical process able to discover, understand and characterize city events from Call Detail Record, designing a distributed computation to implement Sociometer, that is a profiling tool to categorize phone users. The methodology provides an useful tool for city mobility manager to manage the events and taking future decisions on specific classes of users, i.e., residents, commuters and tourists.}, doi = {10.3390/info8030074}, url = {https://doi.org/10.3390/info8030074}, author = {Barbara Furletti and Roberto Trasarti and Paolo Cintia and Lorenzo Gabrielli} } @article {1012, title = {Quantifying the relation between performance and success in soccer}, journal = {Advances in Complex Systems}, year = {2017}, pages = {1750014}, abstract = {The availability of massive data about sports activities offers nowadays the opportunity to quantify the relation between performance and success. In this study, we analyze more than 6000 games and 10 million events in six European leagues and investigate this relation in soccer competitions. We discover that a team{\textquoteright}s position in a competition{\textquoteright}s final ranking is significantly related to its typical performance, as described by a set of technical features extracted from the soccer data. Moreover, we find that, while victory and defeats can be explained by the team{\textquoteright}s performance during a game, it is difficult to detect draws by using a machine learning approach. We then simulate the outcomes of an entire season of each league only relying on technical data and exploiting a machine learning model trained on data from past seasons. The simulation produces a team ranking which is similar to the actual ranking, suggesting that a complex systems{\textquoteright} view on soccer has the potential of revealing hidden patterns regarding the relation between performance and success.}, doi = {10.1142/S021952591750014X}, url = {http://www.worldscientific.com/doi/abs/10.1142/S021952591750014X}, author = {Luca Pappalardo and Paolo Cintia} } @proceedings {770, title = {The harsh rule of the goals: data-driven performance indicators for football teams}, year = {2015}, abstract = {{\textemdash}Sports analytics in general, and football (soccer in USA) analytics in particular, have evolved in recent years in an amazing way, thanks to automated or semi-automated sensing technologies that provide high-fidelity data streams extracted from every game. In this paper we propose a data-driven approach and show that there is a large potential to boost the understanding of football team performance. From observational data of football games we extract a set of pass-based performance indicators and summarize them in the H indicator. We observe a strong correlation among the proposed indicator and the success of a team, and therefore perform a simulation on the four major European championships (78 teams, almost 1500 games). The outcome of each game in the championship was replaced by a synthetic outcome (win, loss or draw) based on the performance indicators computed for each team. We found that the final rankings in the simulated championships are very close to the actual rankings in the real championships, and show that teams with high ranking error show extreme values of a defense/attack efficiency measure, the Pezzali score. Our results are surprising given the simplicity of the proposed indicators, suggesting that a complex systems{\textquoteright} view on football data has the potential of revealing hidden patterns and behavior of superior quality.}, url = {https://www.researchgate.net/profile/Luca_Pappalardo/publication/281318318_The_harsh_rule_of_the_goals_data-driven_performance_indicators_for_football_teams/links/561668e308ae37cfe4090a5d.pdf}, author = {Paolo Cintia and Luca Pappalardo and Dino Pedreschi and Fosca Giannotti and Marco Malvaldi} } @inbook {824, title = {Towards a Boosted Route Planner Using Individual Mobility Models}, booktitle = {Software Engineering and Formal Methods}, year = {2015}, pages = {108{\textendash}123}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, author = {Riccardo Guidotti and Paolo Cintia} } @conference {727, title = {Mining efficient training patterns of non-professional cyclists}, booktitle = {22nd Italian Symposium on Advanced Database Systems, {SEBD} 2014, Sorrento Coast, Italy, June 16-18, 2014.}, year = {2014}, author = {Paolo Cintia and Luca Pappalardo and Dino Pedreschi} } @inbook {575, title = {Mobility Profiling}, booktitle = {Data Science and Simulation in Transportation Research}, year = {2014}, pages = {1-29}, publisher = {IGI Global}, organization = {IGI Global}, chapter = {1}, abstract = {The ability to understand the dynamics of human mobility is crucial for tasks like urban planning and transportation management. The recent rapidly growing availability of large spatio-temporal datasets gives us the possibility to develop sophisticated and accurate analysis methods and algorithms that can enable us to explore several relevant mobility phenomena: the distinct access paths to a territory, the groups of persons that move together in space and time, the regions of a territory that contains a high density of traffic demand, etc. All these paradigmatic perspectives focus on a collective view of the mobility where the interesting phenomenon is the result of the contribution of several moving objects. In this chapter, the authors explore a different approach to the topic and focus on the analysis and understanding of relevant individual mobility habits in order to assign a profile to an individual on the basis of his/her mobility. This process adds a semantic level to the raw mobility data, enabling further analyses that require a deeper understanding of the data itself. The studies described in this chapter are based on two large datasets of spatio-temporal data, originated, respectively, from GPS-equipped devices and from a mobile phone network. }, doi = {10.4018/978-1-4666-4920-0.ch001}, author = {Mirco Nanni and Roberto Trasarti and Paolo Cintia and Barbara Furletti and Chiara Renso and Lorenzo Gabrielli and S Rinzivillo and Fosca Giannotti} } @conference {729, title = {"Engine Matters": {A} First Large Scale Data Driven Study on Cyclists{\textquoteright} Performance}, booktitle = {13th {IEEE} International Conference on Data Mining Workshops, {ICDM} Workshops, TX, USA, December 7-10, 2013}, year = {2013}, doi = {10.1109/ICDMW.2013.41}, url = {http://dx.doi.org/10.1109/ICDMW.2013.41}, author = {Paolo Cintia and Luca Pappalardo and Dino Pedreschi} } @conference {684, title = {A Gravity Model for Speed Estimation over Road Network}, booktitle = {2013 {IEEE} 14th International Conference on Mobile Data Management, Milan, Italy, June 3-6, 2013 - Volume 2}, year = {2013}, doi = {10.1109/MDM.2013.83}, url = {http://dx.doi.org/10.1109/MDM.2013.83}, author = {Paolo Cintia and Roberto Trasarti and Jos{\'e} Ant{\^o}nio Fernandes de Mac{\^e}do and Livia Almada and Camila Fereira} } @conference {537, title = {Inferring human activities from GPS tracks UrbComp}, booktitle = {Workshop at KDD 2013}, year = {2013}, address = {Chicago USA}, author = {Paolo Cintia and Barbara Furletti and Chiara Renso} }