forked from Mapan/odoo17e
286 lines
11 KiB
Python
286 lines
11 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Part of Odoo. See LICENSE file for full copyright and licensing details.
|
|
|
|
from odoo import models, api, fields, _
|
|
from odoo.models import MAGIC_COLUMNS
|
|
from odoo.osv import expression
|
|
from odoo.tools import split_every
|
|
|
|
import logging
|
|
_logger = logging.getLogger(__name__)
|
|
|
|
IGNORED_FIELDS = MAGIC_COLUMNS
|
|
DM_CRON_BATCH_SIZE = 100
|
|
|
|
|
|
class DataMergeGroup(models.Model):
|
|
_name = 'data_merge.group'
|
|
_description = 'Deduplication Group'
|
|
_order = 'similarity desc'
|
|
|
|
active = fields.Boolean(default=True)
|
|
model_id = fields.Many2one('data_merge.model', string='Deduplication Model', ondelete='cascade', required=True)
|
|
res_model_id = fields.Many2one(related='model_id.res_model_id', store=True, readonly=True)
|
|
res_model_name = fields.Char(related='model_id.res_model_name', store=True, readonly=True)
|
|
similarity = fields.Float(
|
|
string='Similarity %', readonly=True, store=True, compute='_compute_similarity',
|
|
help='Similarity coefficient based on the amount of text fields exactly in common.')
|
|
divergent_fields = fields.Char(
|
|
compute='_compute_similarity', store=True)
|
|
record_ids = fields.One2many('data_merge.record', 'group_id')
|
|
|
|
@api.depends('model_id', 'similarity')
|
|
def _compute_display_name(self):
|
|
for group in self:
|
|
group.display_name = _('%s - Similarity: %s%%', group.model_id.name, int(group.similarity * 100))
|
|
|
|
def _get_similarity_fields(self):
|
|
self.ensure_one()
|
|
group_fields = self.env[self.res_model_name]._fields.items()
|
|
return [name for name, field in group_fields if field.type == 'char']
|
|
|
|
@api.depends('record_ids')
|
|
def _compute_similarity(self):
|
|
for group in self:
|
|
if not group.record_ids:
|
|
group.divergent_fields = ''
|
|
group.similarity = 1
|
|
continue
|
|
|
|
read_fields = group._get_similarity_fields()
|
|
|
|
record_ids = group.record_ids.mapped('res_id')
|
|
records = self.env[group.res_model_name].browse(record_ids).read(read_fields)
|
|
# YTI What about unaccent ? Should be taken into account IMO if the
|
|
# rule was computed from that.
|
|
data = set(records[0].items())
|
|
data = data.intersection(*[set(record.items()) for record in records[1:]])
|
|
|
|
diff_fields = set(read_fields) - {k for k, v in data} # fields of the model minus the identical fields
|
|
group.divergent_fields = ','.join(diff_fields)
|
|
group.similarity = min(1, len(data) / len(read_fields))
|
|
|
|
def discard_records(self, records=None):
|
|
domain = [('group_id', '=', self.id)]
|
|
|
|
if records is not None:
|
|
domain = expression.AND([domain, [('id', 'in', records)]])
|
|
self.env['data_merge.record'].search(domain).write({'is_discarded': True, 'is_master': False})
|
|
if all(not record.active for record in self.record_ids):
|
|
self.active = False
|
|
self._elect_master_record()
|
|
|
|
###################
|
|
### Master Record
|
|
###################
|
|
def _elect_master_record(self):
|
|
"""
|
|
Elect the "master" record.
|
|
|
|
This method will look for a `_elect_method()` on the model.
|
|
If it exists, this method is responsible to return the master record, otherwise, a generic method is used.
|
|
"""
|
|
for group in self:
|
|
if hasattr(self.env[group.res_model_name], '_elect_method'):
|
|
elect_master = getattr(self.env[group.res_model_name], '_elect_method')
|
|
else:
|
|
elect_master = group._elect_method
|
|
|
|
records = group.record_ids._original_records()
|
|
if not records:
|
|
return
|
|
|
|
master = elect_master(records)
|
|
if master:
|
|
master_record = group.record_ids.filtered(lambda r: r.res_id == master.id)
|
|
master_record.is_master = True
|
|
|
|
## Generic master
|
|
def _elect_method(self, records):
|
|
"""
|
|
Generic master election method.
|
|
|
|
:param records: all the records of the duplicate group
|
|
:return the oldest record as master
|
|
"""
|
|
records_sorted = records.sorted('create_date')
|
|
return records_sorted[0] if records_sorted else None
|
|
|
|
###########
|
|
### Merge
|
|
###########
|
|
@api.model
|
|
def merge_multiple_records(self, group_records):
|
|
group_ids = self.browse([int(group_id) for group_id in group_records.keys()])
|
|
|
|
for group in group_ids:
|
|
group.merge_records(group_records[str(group.id)])
|
|
|
|
def merge_records(self, records=None):
|
|
"""
|
|
Merge the selected records.
|
|
|
|
This method will look for a `_merge_method()` on the model.
|
|
If it exists, this method is responsible to merge the records, otherwise, the generic method is used.
|
|
|
|
:param records: Group records to be merged, or None if all records should be merged
|
|
"""
|
|
self.ensure_one()
|
|
if records is None:
|
|
records = []
|
|
|
|
domain = [('group_id', '=', self.id)]
|
|
if records:
|
|
domain += [('id', 'in', records)]
|
|
|
|
to_merge = self.env['data_merge.record'].with_context(active_test=False).search(domain, order='id')
|
|
to_merge_count = len(to_merge)
|
|
if to_merge_count <= 1:
|
|
return
|
|
master_record = to_merge.filtered('is_master') or to_merge[0]
|
|
to_merge = to_merge - master_record
|
|
|
|
if not master_record._original_records():
|
|
_logger.warning('The master record does not exist')
|
|
return
|
|
|
|
_logger.info('Merging %s records %s into %s' % (self.res_model_name, to_merge.mapped('res_id'), master_record.res_id))
|
|
|
|
model = self.env[self.res_model_name]
|
|
if hasattr(model, '_merge_method'):
|
|
merge = getattr(model, '_merge_method')
|
|
else:
|
|
merge = self._merge_method
|
|
|
|
# Create a dict with chatter data, in case the merged records are deleted during the merge procedure
|
|
chatter_data = {rec.res_id:dict(res_id=rec.res_id, merged_record=str(rec.name), changes=rec._record_snapshot()) for rec in to_merge}
|
|
res = merge(master_record._original_records(), to_merge._original_records())
|
|
if res.get('log_chatter'):
|
|
self._log_merge(master_record, to_merge, chatter_data)
|
|
|
|
if res.get('post_merge'):
|
|
self._post_merge(master_record, to_merge)
|
|
|
|
is_merge_action = master_record.model_id.is_contextual_merge_action
|
|
(master_record + to_merge).unlink()
|
|
|
|
return {
|
|
'records_merged': res['records_merged'] if res.get('records_merged') else to_merge_count,
|
|
# Used to get back to the functional model if deduplicate was
|
|
# called from contextual action menu - instead of staying on
|
|
# the deduplicate view.
|
|
'back_to_model': is_merge_action
|
|
}
|
|
|
|
def _log_merge(self, master_record, merged_records, chatter_data):
|
|
"""
|
|
Post a snapshot of each merged records on the master record
|
|
"""
|
|
if not isinstance(self.env[self.res_model_name], self.env.registry['mail.thread']):
|
|
return
|
|
|
|
values = {
|
|
'res_model_label': self.res_model_id.name,
|
|
'res_model_name': self.res_model_name,
|
|
'res_id': master_record.res_id,
|
|
'master_record': master_record.name,
|
|
}
|
|
for rec in merged_records:
|
|
master_values = chatter_data.get(rec.res_id, {})
|
|
master_values.update({
|
|
'res_model_label': self.res_model_id.name,
|
|
'res_model_name': self.res_model_name,
|
|
'archived': rec._original_records().exists(),
|
|
})
|
|
if self.model_id.removal_mode == 'archive':
|
|
rec._original_records()._message_log_with_view('data_merge.data_merge_merged', render_values=values)
|
|
master_record._original_records()._message_log_with_view('data_merge.data_merge_main', render_values=master_values)
|
|
|
|
|
|
## Generic Merge
|
|
def _merge_method(self, master, records):
|
|
"""
|
|
Generic merge method, will "only" update the foreign keys from the source records to the master record
|
|
|
|
:param master: original record considered as the destination
|
|
:param records: source records to be merged with the master
|
|
:return dict
|
|
"""
|
|
self.env['data_merge.record']._update_foreign_keys(destination=master, source=records)
|
|
|
|
return {
|
|
'post_merge': True, # Perform post merge activities
|
|
'log_chatter': True # Log merge notes in the chatter
|
|
}
|
|
|
|
def _post_merge(self, master, records):
|
|
"""
|
|
Perform the post merge activities such as archiving or deleting the original record
|
|
"""
|
|
origins = records._original_records()
|
|
if self.model_id.removal_mode == 'delete' or not origins._active_name:
|
|
origins.unlink()
|
|
else:
|
|
origins.write({origins._active_name: False})
|
|
|
|
##########
|
|
### Cron
|
|
##########
|
|
def _cron_cleanup(self, auto_commit=True):
|
|
""" Perform cleanup activities for each data_merge.group. """
|
|
groups = self.with_context(active_test=False).env['data_merge.group'].search([])
|
|
|
|
for batched_groups in split_every(DM_CRON_BATCH_SIZE, groups.ids, self.with_context(active_test=False).browse):
|
|
batched_groups._cleanup()
|
|
|
|
if auto_commit:
|
|
self.env.cr.commit()
|
|
|
|
def _cleanup(self):
|
|
"""
|
|
Do the cleanup, it will delete:
|
|
- merged data_merge.record
|
|
- data_merge.record with archived or deleted original record
|
|
- data_merge.group with 0 or 1 data_merge.record
|
|
"""
|
|
records_to_delete = self.env['data_merge.record']
|
|
groups_to_delete = self.env['data_merge.group']
|
|
|
|
for group in self:
|
|
# Count the records kept per group and if there are discarded records
|
|
records_discarded = False
|
|
records_kept = 0
|
|
|
|
# Delete records no longer existing
|
|
original_records = {r.id: r for r in group.record_ids._original_records()} if group.record_ids else {}
|
|
# Delete group if all original records in a group have been deleted
|
|
if not original_records:
|
|
groups_to_delete += group
|
|
continue
|
|
|
|
for rec in group.record_ids:
|
|
original_record = original_records.get(rec.res_id)
|
|
if not original_record:
|
|
records_to_delete += rec
|
|
continue
|
|
|
|
origin_inactive = (original_record._active_name and not original_record[original_record._active_name])
|
|
if origin_inactive:
|
|
records_to_delete += rec
|
|
continue
|
|
|
|
records_discarded = records_discarded or rec.is_discarded
|
|
if not rec.is_discarded:
|
|
records_kept += 1
|
|
|
|
# Delete groups with at most 1 record and no discarded records
|
|
if not records_discarded and records_kept <= 1:
|
|
groups_to_delete += group
|
|
|
|
# Delete single non-discarded record in groups with discarded record(s)
|
|
if records_discarded and records_kept == 1:
|
|
records_to_delete += group.record_ids.filtered(lambda r: not r.is_discarded)
|
|
|
|
records_to_delete.unlink()
|
|
groups_to_delete.unlink()
|