@article {1400, title = {Give more data, awareness and control to individual citizens, and they will help COVID-19 containment}, year = {2021}, month = {2021/02/02}, abstract = {The rapid dynamics of COVID-19 calls for quick and effective tracking of virus transmission chains and early detection of outbreaks, especially in the {\textquotedblleft}phase 2{\textquotedblright} of the pandemic, when lockdown and other restriction measures are progressively withdrawn, in order to avoid or minimize contagion resurgence. For this purpose, contact-tracing apps are being proposed for large scale adoption by many countries. A centralized approach, where data sensed by the app are all sent to a nation-wide server, raises concerns about citizens{\textquoteright} privacy and needlessly strong digital surveillance, thus alerting us to the need to minimize personal data collection and avoiding location tracking. We advocate the conceptual advantage of a decentralized approach, where both contact and location data are collected exclusively in individual citizens{\textquoteright} {\textquotedblleft}personal data stores{\textquotedblright}, to be shared separately and selectively (e.g., with a backend system, but possibly also with other citizens), voluntarily, only when the citizen has tested positive for COVID-19, and with a privacy preserving level of granularity. This approach better protects the personal sphere of citizens and affords multiple benefits: it allows for detailed information gathering for infected people in a privacy-preserving fashion; and, in turn this enables both contact tracing, and, the early detection of outbreak hotspots on more finely-granulated geographic scale. The decentralized approach is also scalable to large populations, in that only the data of positive patients need be handled at a central level. Our recommendation is two-fold. First to extend existing decentralized architectures with a light touch, in order to manage the collection of location data locally on the device, and allow the user to share spatio-temporal aggregates{\textemdash}if and when they want and for specific aims{\textemdash}with health authorities, for instance. Second, we favour a longer-term pursuit of realizing a Personal Data Store vision, giving users the opportunity to contribute to collective good in the measure they want, enhancing self-awareness, and cultivating collective efforts for rebuilding society.}, isbn = {1572-8439}, doi = {https://doi.org/10.1007/s10676-020-09572-w}, url = {https://link.springer.com/article/10.1007/s10676-020-09572-w}, author = {Mirco Nanni and Andrienko, Gennady and Barabasi, Albert-Laszlo and Boldrini, Chiara and Bonchi, Francesco and Cattuto, Ciro and Chiaromonte, Francesca and Comand{\'e}, Giovanni and Conti, Marco and Cot{\'e}, Mark and Dignum, Frank and Dignum, Virginia and Domingo-Ferrer, Josep and Ferragina, Paolo and Fosca Giannotti and Riccardo Guidotti and Helbing, Dirk and Kaski, Kimmo and Kert{\'e}sz, J{\'a}nos and Lehmann, Sune and Lepri, Bruno and Lukowicz, Paul and Matwin, Stan and Jim{\'e}nez, David Meg{\'\i}as and Anna Monreale and Morik, Katharina and Oliver, Nuria and Passarella, Andrea and Passerini, Andrea and Dino Pedreschi and Pentland, Alex and Pianesi, Fabio and Francesca Pratesi and S Rinzivillo and Salvatore Ruggieri and Siebes, Arno and Torra, Vicenc and Roberto Trasarti and Hoven, Jeroen van den and Vespignani, Alessandro} } @article {1405, title = {An ethico-legal framework for social data science}, year = {2020}, month = {2020/03/31}, abstract = {This paper presents a framework for research infrastructures enabling ethically sensitive and legally compliant data science in Europe. Our goal is to describe how to design and implement an open platform for big data social science, including, in particular, personal data. To this end, we discuss a number of infrastructural, organizational and methodological principles to be developed for a concrete implementation. These include not only systematically tools and methodologies that effectively enable both the empirical evaluation of the privacy risk and data transformations by using privacy-preserving approaches, but also the development of training materials (a massive open online course) and organizational instruments based on legal and ethical principles. This paper provides, by way of example, the implementation that was adopted within the context of the SoBigData Research Infrastructure.}, isbn = {2364-4168}, doi = {https://doi.org/10.1007/s41060-020-00211-7}, url = {https://link.springer.com/article/10.1007/s41060-020-00211-7}, author = {Forg{\'o}, Nikolaus and H{\"a}nold, Stefanie and van~den Hoven, Jeroen and Kr{\"u}gel, Tina and Lishchuk, Iryna and Mahieu, Ren{\'e} and Anna Monreale and Dino Pedreschi and Francesca Pratesi and van Putten, David} } @article {1404, title = {Human migration: the big data perspective}, journal = {International Journal of Data Science and Analytics}, year = {2020}, month = {2020/03/23}, pages = {1{\textendash}20}, abstract = {How can big data help to understand the migration phenomenon? In this paper, we try to answer this question through an analysis of various phases of migration, comparing traditional and novel data sources and models at each phase. We concentrate on three phases of migration, at each phase describing the state of the art and recent developments and ideas. The first phase includes the journey, and we study migration flows and stocks, providing examples where big data can have an impact. The second phase discusses the stay, i.e. migrant integration in the destination country. We explore various data sets and models that can be used to quantify and understand migrant integration, with the final aim of providing the basis for the construction of a novel multi-level integration index. The last phase is related to the effects of migration on the source countries and the return of migrants.}, isbn = {2364-4168}, doi = {https://doi.org/10.1007/s41060-020-00213-5}, url = {https://link.springer.com/article/10.1007\%2Fs41060-020-00213-5}, author = {Alina Sirbu and Andrienko, Gennady and Andrienko, Natalia and Boldrini, Chiara and Conti, Marco and Fosca Giannotti and Riccardo Guidotti and Bertoli, Simone and Jisu Kim and Muntean, Cristina Ioana and Luca Pappalardo and Passarella, Andrea and Dino Pedreschi and Pollacci, Laura and Francesca Pratesi and Sharma, Rajesh} } @article {1421, title = {PRIMULE: Privacy risk mitigation for user profiles}, volume = {125}, year = {2020}, month = {2020/01/01/}, pages = {101786}, abstract = {The availability of mobile phone data has encouraged the development of different data-driven tools, supporting social science studies and providing new data sources to the standard official statistics. However, this particular kind of data are subject to privacy concerns because they can enable the inference of personal and private information. In this paper, we address the privacy issues related to the sharing of user profiles, derived from mobile phone data, by proposing PRIMULE, a privacy risk mitigation strategy. Such a method relies on PRUDEnce (Pratesi et~al., 2018), a privacy risk assessment framework that provides a methodology for systematically identifying risky-users in a set of data. An extensive experimentation on real-world data shows the effectiveness of PRIMULE strategy in terms of both quality of mobile user profiles and utility of these profiles for analytical services such as the Sociometer (Furletti et~al., 2013), a data mining tool for city users classification.}, isbn = {0169-023X}, doi = {https://doi.org/10.1016/j.datak.2019.101786}, url = {https://www.sciencedirect.com/science/article/pii/S0169023X18305342}, author = {Francesca Pratesi and Lorenzo Gabrielli and Paolo Cintia and Anna Monreale and Fosca Giannotti} } @conference {1197, title = {Analyzing Privacy Risk in Human Mobility Data}, booktitle = {Software Technologies: Applications and Foundations - STAF 2018 Collocated Workshops, Toulouse, France, June 25-29, 2018, Revised Selected Papers}, year = {2018}, abstract = {Mobility data are of fundamental importance for understanding the patterns of human movements, developing analytical services and modeling human dynamics. Unfortunately, mobility data also contain individual sensitive information, making it necessary an accurate privacy risk assessment for the individuals involved. In this paper, we propose a methodology for assessing privacy risk in human mobility data. Given a set of individual and collective mobility features, we define the minimum data format necessary for the computation of each feature and we define a set of possible attacks on these data formats. We perform experiments computing the empirical risk in a real-world mobility dataset, and show how the distributions of the considered mobility features are affected by the removal of individuals with different levels of privacy risk.}, doi = {10.1007/978-3-030-04771-9_10}, url = {https://doi.org/10.1007/978-3-030-04771-9_10}, author = {Roberto Pellungrini and Luca Pappalardo and Francesca Pratesi and Anna Monreale} } @inbook {1422, title = {How Data Mining and Machine Learning Evolved from Relational Data Base to Data Science}, booktitle = {A Comprehensive Guide Through the Italian Database Research Over the Last 25 Years}, year = {2018}, pages = {287 - 306}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, address = {Cham}, abstract = {During the last 35 years, data management principles such as physical and logical independence, declarative querying and cost-based optimization have led to profound pervasiveness of relational databases in any kind of organization. More importantly, these technical advances have enabled the first round of business intelligence applications and laid the foundation for managing and analyzing Big Data today.}, isbn = {978-3-319-61893-7}, doi = {https://doi.org/10.1007/978-3-319-61893-7_17}, url = {https://link.springer.com/chapter/10.1007\%2F978-3-319-61893-7_17}, author = {Amato, G. and Candela, L. and Castelli, D. and Esuli, A. and Falchi, F. and Gennaro, C. and Fosca Giannotti and Anna Monreale and Mirco Nanni and Pagano, P. and Luca Pappalardo and Dino Pedreschi and Francesca Pratesi and Rabitti, F. and S Rinzivillo and Giulio Rossetti and Salvatore Ruggieri and Sebastiani, F. and Tesconi, M.}, editor = {Flesca, Sergio and Greco, Sergio and Masciari, Elio and Sacc{\`a}, Domenico} } @article {1138, title = {PRUDEnce: a system for assessing privacy risk vs utility in data sharing ecosystems}, journal = {Transactions on Data Privacy}, volume = {11}, number = {2}, year = {2018}, month = {08/2018}, abstract = {Data describing human activities are an important source of knowledge useful for understanding individual and collective behavior and for developing a wide range of user services. Unfortunately, this kind of data is sensitive, because people{\textquoteright}s whereabouts may allow re-identification of individuals in a de-identified database. Therefore, Data Providers, before sharing those data, must apply any sort of anonymization to lower the privacy risks, but they must be aware and capable of controlling also the data quality, since these two factors are often a trade-off. In this paper we propose PRUDEnce (Privacy Risk versus Utility in Data sharing Ecosystems), a system enabling a privacy-aware ecosystem for sharing personal data. It is based on a methodology for assessing both the empirical (not theoretical) privacy risk associated to users represented in the data, and the data quality guaranteed only with users not at risk. Our proposal is able to support the Data Provider in the exploration of a repertoire of possible data transformations with the aim of selecting one specific transformation that yields an adequate trade-off between data quality and privacy risk. We study the practical effectiveness of our proposal over three data formats underlying many services, defined on real mobility data, i.e., presence data, trajectory data and road segment data.}, url = {http://www.tdp.cat/issues16/tdp.a284a17.pdf}, author = {Francesca Pratesi and Anna Monreale and Roberto Trasarti and Fosca Giannotti and Dino Pedreschi and Yanagihara, Tadashi} } @conference {1198, title = {Assessing Privacy Risk in Retail Data}, booktitle = {Personal Analytics and Privacy. An Individual and Collective Perspective - First International Workshop, PAP 2017, Held in Conjunction with ECML PKDD 2017, Skopje, Macedonia, September 18, 2017, Revised Selected Papers}, year = {2017}, abstract = {Retail data are one of the most requested commodities by commercial companies. Unfortunately, from this data it is possible to retrieve highly sensitive information about individuals. Thus, there exists the need for accurate individual privacy risk evaluation. In this paper, we propose a methodology for assessing privacy risk in retail data. We define the data formats for representing retail data, the privacy framework for calculating privacy risk and some possible privacy attacks for this kind of data. We perform experiments in a real-world retail dataset, and show the distribution of privacy risk for the various attacks.}, doi = {10.1007/978-3-319-71970-2_3}, url = {https://doi.org/10.1007/978-3-319-71970-2_3}, author = {Roberto Pellungrini and Francesca Pratesi and Luca Pappalardo} } @article {1013, title = {A Data Mining Approach to Assess Privacy Risk in Human Mobility Data}, journal = {ACM Trans. Intell. Syst. Technol.}, volume = {9}, number = {3}, year = {2017}, pages = {31:1{\textendash}31:27}, abstract = {Human mobility data are an important proxy to understand human mobility dynamics, develop analytical services, and design mathematical models for simulation and what-if analysis. Unfortunately mobility data are very sensitive since they may enable the re-identification of individuals in a database. Existing frameworks for privacy risk assessment provide data providers with tools to control and mitigate privacy risks, but they suffer two main shortcomings: (i) they have a high computational complexity; (ii) the privacy risk must be recomputed every time new data records become available and for every selection of individuals, geographic areas, or time windows. In this article, we propose a fast and flexible approach to estimate privacy risk in human mobility data. The idea is to train classifiers to capture the relation between individual mobility patterns and the level of privacy risk of individuals. We show the effectiveness of our approach by an extensive experiment on real-world GPS data in two urban areas and investigate the relations between human mobility patterns and the privacy risk of individuals.}, issn = {2157-6904}, doi = {10.1145/3106774}, url = {http://doi.acm.org/10.1145/3106774}, author = {Roberto Pellungrini and Luca Pappalardo and Francesca Pratesi and Anna Monreale} } @booklet {993, title = {Fast Estimation of Privacy Risk in Human Mobility Data}, year = {2017}, abstract = {Mobility data are an important proxy to understand the patterns of human movements, develop analytical services and design models for simulation and prediction of human dynamics. Unfortunately mobility data are also very sensitive, since they may contain personal information about the individuals involved. Existing frameworks for privacy risk assessment enable the data providers to quantify and mitigate privacy risks, but they suffer two main limitations: (i) they have a high computational complexity; (ii) the privacy risk must be re-computed for each new set of individuals, geographic areas or time windows. In this paper we explore a fast and flexible solution to estimate privacy risk in human mobility data, using predictive models to capture the relation between an individual{\textquoteright}s mobility patterns and her privacy risk. We show the effectiveness of our approach by experimentation on a real-world GPS dataset and provide a comparison with traditional methods. }, isbn = {978-3-319-66283-1}, doi = {10.1007/978-3-319-66284-8_35 }, author = {Roberto Pellungrini and Luca Pappalardo and Francesca Pratesi and Anna Monreale} } @conference {1049, title = {Privacy Preserving Multidimensional Profiling}, booktitle = {International Conference on Smart Objects and Technologies for Social Good}, year = {2017}, publisher = {Springer}, organization = {Springer}, abstract = {Recently, big data had become central in the analysis of human behavior and the development of innovative services. In particular, a new class of services is emerging, taking advantage of different sources of data, in order to consider the multiple aspects of human beings. Unfortunately, these data can lead to re-identification problems and other privacy leaks, as diffusely reported in both scientific literature and media. The risk is even more pressing if multiple sources of data are linked together since a potential adversary could know information related to each dataset. For this reason, it is necessary to evaluate accurately and mitigate the individual privacy risk before releasing personal data. In this paper, we propose a methodology for the first task, i.e., assessing privacy risk, in a multidimensional scenario, defining some possible privacy attacks and simulating them using real-world datasets.}, doi = {10.1007/978-3-319-76111-4_15}, url = {https://link.springer.com/chapter/10.1007/978-3-319-76111-4_15}, author = {Francesca Pratesi and Anna Monreale and Fosca Giannotti and Dino Pedreschi} } @conference {761, title = {Managing travels with PETRA: The Rome use case}, booktitle = {2015 31st IEEE International Conference on Data Engineering Workshops (ICDEW)}, year = {2015}, publisher = {IEEE}, organization = {IEEE}, abstract = {The aim of the PETRA project is to provide the basis for a city-wide transportation system that supports policies catering for both individual preferences of users and city-wide travel patterns. The PETRA platform will be initially deployed in the partner city of Rome, and later in Venice, and Tel-Aviv.}, author = {Botea, Adi and Braghin, Stefano and Lopes, Nuno and Riccardo Guidotti and Francesca Pratesi} } @conference {764, title = {Mobility Mining for Journey Planning in Rome}, booktitle = {Machine Learning and Knowledge Discovery in Databases}, year = {2015}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, abstract = {We present recent results on integrating private car GPS routines obtained by a Data Mining module. into the PETRA (PErsonal TRansport Advisor) platform. The routines are used as additional {\textquotedblleft}bus lines{\textquotedblright}, available to provide a ride to travelers. We present the effects of querying the planner with and without the routines, which show how Data Mining may help Smarter Cities applications.}, author = {Michele Berlingerio and Bicer, Veli and Botea, Adi and Braghin, Stefano and Lopes, Nuno and Riccardo Guidotti and Francesca Pratesi} } @article {EPJ14, title = {Privacy-by-Design in Big Data Analytics and Social Mining}, journal = {EPJ Data Science}, volume = {10}, year = {2014}, note = {2014:10}, abstract = {Privacy is ever-growing concern in our society and is becoming a fundamental aspect to take into account when one wants to use, publish and analyze data involving human personal sensitive information. Unfortunately, it is increasingly hard to transform the data in a way that it protects sensitive information: we live in the era of big data characterized by unprecedented opportunities to sense, store and analyze social data describing human activities in great detail and resolution. As a result, privacy preservation simply cannot be accomplished by de-identification alone. In this paper, we propose the privacy-by-design paradigm to develop technological frameworks for countering the threats of undesirable, unlawful effects of privacy violation, without obstructing the knowledge discovery opportunities of social mining and big data analytical technologies. Our main idea is to inscribe privacy protection into the knowledge discovery technology by design, so that the analysis incorporates the relevant privacy requirements from the start.}, doi = {10.1140/epjds/s13688-014-0010-4}, author = {Anna Monreale and S Rinzivillo and Francesca Pratesi and Fosca Giannotti and Dino Pedreschi} } @conference {615, title = {Privacy-Aware Distributed Mobility Data Analytics}, booktitle = {SEBD}, year = {2013}, address = {Roccella Jonica}, abstract = {We propose an approach to preserve privacy in an analytical processing within a distributed setting, and tackle the problem of obtaining aggregated information about vehicle traffic in a city from movement data collected by individual vehicles and shipped to a central server. Movement data are sensitive because they may describe typical movement behaviors and therefore be used for re-identification of individuals in a database. We provide a privacy-preserving framework for movement data aggregation based on trajectory generalization in a distributed environment. The proposed solution, based on the differential privacy model and on sketching techniques for efficient data compression, provides a formal data protection safeguard. Using real-life data, we demonstrate the effectiveness of our approach also in terms of data utility preserved by the data transformation. }, author = {Francesca Pratesi and Anna Monreale and Hui Wendy Wang and S Rinzivillo and Dino Pedreschi and Gennady Andrienko and Natalia Andrienko} } @inbook {571, title = {Privacy-Preserving Distributed Movement Data Aggregation}, booktitle = {Geographic Information Science at the Heart of Europe}, series = {Lecture Notes in Geoinformation and Cartography}, year = {2013}, pages = {225-245}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, abstract = {We propose a novel approach to privacy-preserving analytical processing within a distributed setting, and tackle the problem of obtaining aggregated information about vehicle traffic in a city from movement data collected by individual vehicles and shipped to a central server. Movement data are sensitive because people{\textquoteright}s whereabouts have the potential to reveal intimate personal traits, such as religious or sexual preferences, and may allow re-identification of individuals in a database. We provide a privacy-preserving framework for movement data aggregation based on trajectory generalization in a distributed environment. The proposed solution, based on the differential privacy model and on sketching techniques for efficient data compression, provides a formal data protection safeguard. Using real-life data, we demonstrate the effectiveness of our approach also in terms of data utility preserved by the data transformation.}, isbn = {978-3-319-00614-7}, doi = {10.1007/978-3-319-00615-4_13}, url = {http://dx.doi.org/10.1007/978-3-319-00615-4_13}, author = {Anna Monreale and Hui Wendy Wang and Francesca Pratesi and S Rinzivillo and Dino Pedreschi and Gennady Andrienko and Natalia Andrienko}, editor = {Vandenbroucke, Danny and Bucher, B{\'e}n{\'e}dicte and Crompvoets, Joep} }