@conference {1053, title = {SoBigData: Social Mining \& Big Data Ecosystem}, booktitle = {Companion of the The Web Conference 2018 on The Web Conference 2018}, year = {2018}, publisher = {International World Wide Web Conferences Steering Committee}, organization = {International World Wide Web Conferences Steering Committee}, abstract = {One of the most pressing and fascinating challenges scientists face today, is understanding the complexity of our globally interconnected society. The big data arising from the digital breadcrumbs of human activities has the potential of providing a powerful social microscope, which can help us understand many complex and hidden socio-economic phenomena. Such challenge requires high-level analytics, modeling and reasoning across all the social dimensions above. There is a need to harness these opportunities for scientific advancement and for the social good, compared to the currently prevalent exploitation of big data for commercial purposes or, worse, social control and surveillance. The main obstacle to this accomplishment, besides the scarcity of data scientists, is the lack of a large-scale open ecosystem where big data and social mining research can be carried out. The SoBigData Research Infrastructure (RI) provides an integrated ecosystem for ethic-sensitive scientific discoveries and advanced applications of social data mining on the various dimensions of social life as recorded by "big data". The research community uses the SoBigData facilities as a "secure digital wind-tunnel" for large-scale social data analysis and simulation experiments. SoBigData promotes repeatable and open science and supports data science research projects by providing: i) an ever-growing, distributed data ecosystem for procurement, access and curation and management of big social data, to underpin social data mining research within an ethic-sensitive context; ii) an ever-growing, distributed platform of interoperable, social data mining methods and associated skills: tools, methodologies and services for mining, analysing, and visualising complex and massive datasets, harnessing the techno-legal barriers to the ethically safe deployment of big data for social mining; iii) an ecosystem where protection of personal information and the respect for fundamental human rights can coexist with a safe use of the same information for scientific purposes of broad and central societal interest. SoBigData has a dedicated ethical and legal board, which is implementing a legal and ethical framework.}, url = {http://www.sobigdata.eu/sites/default/files/www\%202018.pdf}, author = {Fosca Giannotti and Roberto Trasarti and Bontcheva, Kalina and Valerio Grossi} } @article {896, title = {HyWare: a HYbrid Workflow lAnguage for Research E-infrastructures}, journal = {D-Lib Magazine}, volume = {23}, number = {1/2}, year = {2017}, abstract = {Research e-infrastructures are "systems of systems", patchworks of tools, services and data sources, evolving over time to address the needs of the scientific process. Accordingly, in such environments, researchers implement their scientific processes by means of workflows made of a variety of actions, including for example usage of web services, download and execution of shared software libraries or tools, or local and manual manipulation of data. Although scientists may benefit from sharing their scientific process, the heterogeneity underpinning e-infrastructures hinders their ability to represent, share and eventually reproduce such workflows. This work presents HyWare, a language for representing scientific process in highly-heterogeneous e-infrastructures in terms of so-called hybrid workflows. HyWare lays in between "business process modeling languages", which offer a formal and high-level description of a reasoning, protocol, or procedure, and "workflow execution languages", which enable the fully automated execution of a sequence of computational steps via dedicated engines.}, doi = {10.1045/january2017-candela}, url = {http://dx.doi.org/10.1045/january2017-candela}, author = {Leonardo Candela and Paolo Manghi and Fosca Giannotti and Valerio Grossi and Roberto Trasarti} } @article {966, title = {Survey on using constraints in data mining}, journal = {Data Mining and Knowledge Discovery}, volume = {31}, number = {2}, year = {2017}, pages = {424{\textendash}464}, abstract = {This paper provides an overview of the current state-of-the-art on using constraints in knowledge discovery and data mining. The use of constraints in a data mining task requires specific definition and satisfaction tools during knowledge extraction. This survey proposes three groups of studies based on classification, clustering and pattern mining, whether the constraints are on the data, the models or the measures, respectively. We consider the distinctions between hard and soft constraint satisfaction, and between the knowledge extraction phases where constraints are considered. In addition to discussing how constraints can be used in data mining, we show how constraint-based languages can be used throughout the data mining process.}, doi = {10.1007/s10618-016-0480-z}, author = {Valerio Grossi and Andrea Romei and Franco Turini} } @inbook {965, title = {Data Mining and Constraints: An Overview}, booktitle = {Data Mining and Constraint Programming}, year = {2016}, pages = {25{\textendash}48}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, abstract = {This paper provides an overview of the current state-of-the-art on using constraints in knowledge discovery and data mining. The use of constraints requires mechanisms for defining and evaluating them during the knowledge extraction process. We give a structured account of three main groups of constraints based on the specific context in which they are defined and used. The aim is to provide a complete view on constraints as a building block of data mining methods.}, doi = {10.1007/978-3-319-50137-6_2}, author = {Valerio Grossi and Dino Pedreschi and Franco Turini} } @article {875, title = {Driving Profiles Computation and Monitoring for Car Insurance CRM}, journal = {Journal ACM Transactions on Intelligent Systems and Technology (TIST)}, volume = {8}, number = {1}, year = {2016}, pages = {14:1{\textendash}14:26}, abstract = {Customer segmentation is one of the most traditional and valued tasks in customer relationship management (CRM). In this article, we explore the problem in the context of the car insurance industry, where the mobility behavior of customers plays a key role: Different mobility needs, driving habits, and skills imply also different requirements (level of coverage provided by the insurance) and risks (of accidents). In the present work, we describe a methodology to extract several indicators describing the driving profile of customers, and we provide a clustering-oriented instantiation of the segmentation problem based on such indicators. Then, we consider the availability of a continuous flow of fresh mobility data sent by the circulating vehicles, aiming at keeping our segments constantly up to date. We tackle a major scalability issue that emerges in this context when the number of customers is large-namely, the communication bottleneck-by proposing and implementing a sophisticated distributed monitoring solution that reduces communications between vehicles and company servers to the essential. We validate the framework on a large database of real mobility data coming from GPS devices on private cars. Finally, we analyze the privacy risks that the proposed approach might involve for the users, providing and evaluating a countermeasure based on data perturbation.}, doi = {10.1145/2912148}, url = {http://doi.acm.org/10.1145/2912148}, author = {Mirco Nanni and Roberto Trasarti and Anna Monreale and Valerio Grossi and Dino Pedreschi} } @inbook {877, title = {Partition-Based Clustering Using Constraint Optimization}, booktitle = {Data Mining and Constraint Programming - Foundations of a Cross-Disciplinary Approach}, year = {2016}, pages = {282{\textendash}299}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, abstract = {Partition-based clustering is the task of partitioning a dataset in a number of groups of examples, such that examples in each group are similar to each other. Many criteria for what constitutes a good clustering have been identified in the literature; furthermore, the use of additional constraints to find more useful clusterings has been proposed. In this chapter, it will be shown that most of these clustering tasks can be formalized using optimization criteria and constraints. We demonstrate how a range of clustering tasks can be modelled in generic constraint programming languages with these constraints and optimization criteria. Using the constraint-based modeling approach we also relate the DBSCAN method for density-based clustering to the label propagation technique for community discovery.}, doi = {10.1007/978-3-319-50137-6_11}, url = {http://dx.doi.org/10.1007/978-3-319-50137-6_11}, author = {Valerio Grossi and Tias Guns and Anna Monreale and Mirco Nanni and Siegfried Nijssen} } @conference {878, title = {Clustering Formulation Using Constraint Optimization}, booktitle = {Software Engineering and Formal Methods - {SEFM} 2015 Collocated Workshops: ATSE, HOFM, MoKMaSD, and VERY*SCART, York, UK, September 7-8, 2015, Revised Selected Papers}, year = {2015}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = {The problem of clustering a set of data is a textbook machine learning problem, but at the same time, at heart, a typical optimization problem. Given an objective function, such as minimizing the intra-cluster distances or maximizing the inter-cluster distances, the task is to find an assignment of data points to clusters that achieves this objective. In this paper, we present a constraint programming model for a centroid based clustering and one for a density based clustering. In particular, as a key contribution, we show how the expressivity introduced by the formulation of the problem by constraint programming makes the standard problem easy to be extended with other constraints that permit to generate interesting variants of the problem. We show this important aspect in two different ways: first, we show how the formulation of the density-based clustering by constraint programming makes it very similar to the label propagation problem and then, we propose a variant of the standard label propagation approach.}, doi = {10.1007/978-3-662-49224-6_9}, url = {http://dx.doi.org/10.1007/978-3-662-49224-6_9}, author = {Valerio Grossi and Anna Monreale and Mirco Nanni and Dino Pedreschi and Franco Turini} } @conference {GrossiRR08, title = {A Case Study in Sequential Pattern Mining for IT-Operational Risk}, booktitle = {ECML/PKDD (1)}, year = {2008}, pages = {424-439}, author = {Valerio Grossi and Andrea Romei and Salvatore Ruggieri} } @inbook {472, title = {Discovering Strategic Behaviour in Multi- Agent Scenarios by Ontology-Driven Mining}, booktitle = {Advances in Robotics, Automation and Control}, year = {2008}, isbn = {978-953-7619-16-9}, url = {http://www.intechopen.com/books/advances_in_robotics_automation_and_control/discovering_strategic_behaviors_in_multi-agent_scenarios_by_ontology-driven_mining}, author = {Davide Bacciu and Andrea Bellandi and Barbara Furletti and Valerio Grossi and Andrea Romei} } @conference {469, title = {Ontological Support for Association Rule Mining}, booktitle = {IASTED International Conference on Artificial Intelligence and Applications (AIA)}, year = {2008}, address = {Innsbruck, Austria }, author = {Barbara Furletti and Andrea Bellandi and Valerio Grossi and Andrea Romei} } @conference {468, title = {Ontology-Driven Association Rule Extraction: A Case Study}, booktitle = {International Workshop on Contexts and Ontologies: Representation and Reasoning}, year = {2007}, month = {2007}, address = {Roskilde, Denmark}, url = {http://ceur-ws.org/Vol-298/paper1.pdf}, author = {Barbara Furletti and Andrea Bellandi and Valerio Grossi and Andrea Romei} } @conference {467, title = {PUSHING CONSTRAINTS IN ASSOCIATION RULE MINING: AN ONTOLOGY-BASED APPROACH }, booktitle = { IADIS International Conference WWW/Internet 2007}, year = {2007}, month = {2007}, isbn = {978-972-8924-44-7}, url = {http://www.iadisportal.org/digital-library/mdownload/pushing-constraints-in-association-rule-mining-an-ontology-based-approach}, author = {Barbara Furletti and Andrea Bellandi and Andrea Romei and Valerio Grossi} }