@conference {aoga2017jfpc,
title = {Algorithme Efficace pour la Fouille de S{\'e}quences Fr{\'e}quentes avec la Programmation par Contraintes},
booktitle = {Treizi{\`e}mes journ{\'e}es Francophones de Programmation par Contraintes - JFPC{\textquoteright}17},
year = {2017},
abstract = {La programmation par contraintes (CP) a s{\'e}duit la communaut{\'e} de fouille de donn{\'e}es gr{\^a}ce {\`a} son approche hautement d{\'e}clarative et sa flexibilit{\'e}. Cependant, ces avantages n{\textquoteright}offrent pas une garantie d{\textquoteright}efficacit{\'e}. En t{\'e}moigne la litt{\'e}rature o{\`u} les m{\'e}thodes bas{\'e}es sur la CP n{\textquoteright}ont toujours pas r{\'e}ussi {\`a} {\^e}tre aussi performante que les m{\'e}thodes sp{\'e}cialis{\'e}es. Dans ce papier publi{\'e} au ECML-PKDD{\textquoteright}16 aoga2016pkdd, nous montrons comment en combinant les techniques des deux mondes on peut arriver {\`a} une m{\'e}thode tr{\`e}s robuste et efficace en plus d{\textquoteright}{\^e}tre modulaire et flexible pour r{\'e}soudre les probl{\`e}mes de fouille de s{\'e}quences fr{\'e}quentes. Notre approche, intitul{\'e}e PPIC, est une contrainte globale, con{\c c}ue sur les m{\'e}thodes de projection de base de donn{\'e}es. Cette contrainte utilise une structure de donn{\'e}es auto-backtraquante (trailing) pour stocker et restaurer efficacement les bases de donn{\'e}es projet{\'e}es. Le calcul des supports et le filtrage reposent sur des am{\'e}liorations algorithmiques utilisant des donn{\'e}es pr{\'e}-calcul{\'e}es. Des exp{\'e}riences d{\'e}taill{\'e}es montrent comment cette approche, pour la premi{\`e}re fois, surpasse {\`a} la fois les m{\'e}thodes bas{\'e}es sur la CP et celles sp{\'e}cialis{\'e}es. Ainsi, le moindre qu{\textquoteright}on puisse dire est que le tandem fouille de donn{\'e}es et CP a encore de beaux jours devant lui.},
author = {Aoga, John O. R. and Guns, Tias and Pierre Schaus}
}
@conference {DBLP:conf/cp/SchausAG17,
title = {CoverSize: {A} Global Constraint for Frequency-Based Itemset Mining},
booktitle = {Principles and Practice of Constraint Programming: 23rd International Conference, {CP 2017}},
volume = {10416},
year = {2017},
pages = {529{\textendash}546},
publisher = {Springer International Publishing},
organization = {Springer International Publishing},
address = {Melbourne, VIC, Australia},
abstract = {Constraint Programming is becoming competitive for solving certain data-mining problems largely due to the development of global constraints. We introduce the CoverSize constraint for itemset mining problems, a global constraint for counting and constraining the number of transactions covered by the itemset decision variables. We show the relation of this constraint to the well-known table constraint, and our filtering algorithm internally uses the reversible sparse bitset data structure recently proposed for filtering table. Furthermore, we expose the size of the cover as a variable, which opens up new modelling perspectives compared to an existing global constraint for (closed) frequent itemset mining. For example, one can constrain minimum frequency or compare the frequency of an itemset in different datasets as is done in discriminative itemset mining. We demonstrate experimentally on the frequent, closed and discriminative itemset mining problems that the CoverSize constraint with reversible sparse bitsets allows to outperform other CP approaches.},
isbn = {978-3-319-66158-2},
doi = {10.1007/978-3-319-66158-2_34},
url = {https://doi.org/10.1007/978-3-319-66158-2_34},
author = {Pierre Schaus and Aoga, John O. R. and Guns, Tias}
}
@article {aoga2017constraint,
title = {Mining Time-constrained Sequential Patterns with Constraint Programming},
journal = {Constraints},
volume = {22},
year = {2017},
abstract = {Constraint Programming (CP) has proven to be an effective platform for constraint-based sequence mining. Previous work has focused on standard frequent sequence mining, as well as frequent sequence mining with a maximum {\textquoteright}gap{\textquoteright} between two matching events in a sequence. The main challenge in the latter is that this constraint can not be imposed independently of the omnipresent frequency constraint. Indeed, the gap constraint changes whether a subsequence is included in a sequence, and hence its frequency. In this work, we go beyond that and investigate the integration of timed events and constraining the minimum/maximum gap as well as minimum/maximum span. The latter constrains the allowed time between the first and last matching event of a pattern. We show how the three are interrelated, and what the required changes to the frequency constraint are. Key in our approach is the concept of an extension window defined by gap/span and we develop techniques to avoid scanning the sequences needlessly, as well as using a backtracking-aware data structure. Experiments demonstrate that the proposed approach outperforms both specialized and CP-based approaches in almost all cases and that the advantage increases as the minimum frequency threshold decreases. This paper is an extension of the original manuscript presented at CPAIOR{\textquoteright}17 aoga2017cpaior.},
keywords = {constraint programming, Data mining, Gap constraint, Global Constraint, Sequential pattern mining, Span constraint, Time constraint},
author = {Aoga, John O. R. and Guns, Tias and Pierre Schaus}
}
@conference {aoga2017cpaior,
title = {Mining Time-constrained Sequential Patterns with Constraint Programming},
booktitle = {International Conference on Integration of AI and OR Techniques in Constraint Programming for Combinatorial Optimization Problems (CPAIOR17)},
year = {2017},
abstract = {See Constraint Journal paper aoga2017constraint.},
author = {Aoga, John O. R. and Guns, Tias and Pierre Schaus}
}
@conference {aoga2016pkdd,
title = {An Efficient Algorithm for Mining Frequent Sequence with Constraint Programming},
booktitle = {Machine Learning and Knowledge Discovery in Databases: European Conference, ECML PKDD 2016, Riva del Garda, Italy, September 19-23, 2016, Proceedings, Part II},
year = {2016},
pages = {315{\textendash}330},
publisher = {Springer International Publishing},
organization = {Springer International Publishing},
address = {Cham},
abstract = {The main advantage of Constraint Programming (CP)
approaches for sequential pattern mining (SPM) is their modularity,
which includes the ability to add new constraints (regular expressions,
length restrictions, etc.). The current best CP approach for SPM uses
a global constraint (module) that computes the projected database and
enforces the minimum frequency; it does this with a filtering algorithm
similar to the PrefixSpan method. However, the resulting system is not
as scalable as some of the most advanced mining systems like Zaki{\textquoteright}s
cSPADE. We show how, using techniques from both data mining and
CP, one can use a generic constraint solver and yet outperform existing
specialized systems. This is mainly due to two improvements in the
module that computes the projected frequencies: first, computing the
projected database can be sped up by pre-computing the positions at
which a symbol can become unsupported by a sequence, thereby avoiding
to scan the full sequence each time; and second by taking inspiration
from the trailing used in CP solvers to devise a backtracking-aware
data structure that allows fast incremental storing and restoring of the
projected database. Detailed experiments show how this approach outperforms
existing CP as well as specialized systems for SPM, and that
the gain in efficiency translates directly into increased efficiency for other
settings such as mining with regular expressions. The data and software
related to this paper are available at http://sites.uclouvain.be/cp4dm/
spm/},
isbn = {978-3-319-46227-1},
doi = {10.1007/978-3-319-46227-1_20},
url = {http://dx.doi.org/10.1007/978-3-319-46227-1_20},
author = {Aoga, John O. R. and Guns, Tias and Pierre Schaus},
editor = {Frasconi, Paolo and Landwehr, Niels and Manco, Giuseppe and Vreeken, Jilles}
}