musicdiff.comparison

View Source

   1# ------------------------------------------------------------------------------
   2# Purpose:       comparison is a music comparison package for use by musicdiff.
   3#                musicdiff is a package for comparing music scores using music21.
   4#
   5# Authors:       Greg Chapman <gregc@mac.com>
   6#                musicdiff is derived from:
   7#                   https://github.com/fosfrancesco/music-score-diff.git
   8#                   by Francesco Foscarin <foscarin.francesco@gmail.com>
   9#
  10# Copyright:     (c) 2022-2025 Francesco Foscarin, Greg Chapman
  11# License:       MIT, see LICENSE
  12# ------------------------------------------------------------------------------
  13
  14__docformat__ = "google"
  15
  16import copy
  17from collections import namedtuple
  18from difflib import ndiff
  19from pathlib import Path
  20
  21# import typing as t
  22import numpy as np
  23
  24from music21.common import OffsetQL
  25from musicdiff.annotation import AnnScore, AnnNote, AnnVoice, AnnExtra, AnnLyric
  26from musicdiff.annotation import AnnStaffGroup, AnnMetadataItem
  27from musicdiff import M21Utils
  28
  29class EvaluationMetrics:
  30    def __init__(
  31        self,
  32        gt_path: Path,
  33        pred_path: Path,
  34        gt_numsyms: int,
  35        pred_numsyms: int,
  36        omr_edit_distance: int,
  37        edit_distances_dict: dict[str, int],
  38        omr_ned: float
  39    ):
  40        self.gt_path: Path = gt_path
  41        self.pred_path: Path = pred_path
  42        self.gt_numsyms: int = gt_numsyms
  43        self.pred_numsyms: int = pred_numsyms
  44        self.omr_edit_distance: int = omr_edit_distance
  45        self.edit_distances_dict: dict[str, int] = edit_distances_dict
  46        self.omr_ned: float = omr_ned
  47
  48# memoizers to speed up the recursive computation
  49def _memoize_notes_set_distance(func):
  50    def memoizer(original, compare_to):
  51        key = repr(original) + repr(compare_to)
  52        if key not in Comparison._memoizer_mem:
  53            Comparison._memoizer_mem[key] = func(original, compare_to)
  54        return copy.deepcopy(Comparison._memoizer_mem[key])
  55
  56    return memoizer
  57
  58def _memoize_extras_set_distance(func):
  59    def memoizer(original, compare_to):
  60        key = repr(original) + repr(compare_to)
  61        if key not in Comparison._memoizer_mem:
  62            Comparison._memoizer_mem[key] = func(original, compare_to)
  63        return copy.deepcopy(Comparison._memoizer_mem[key])
  64
  65    return memoizer
  66
  67def _memoize_staff_groups_set_distance(func):
  68    def memoizer(original, compare_to):
  69        key = repr(original) + repr(compare_to)
  70        if key not in Comparison._memoizer_mem:
  71            Comparison._memoizer_mem[key] = func(original, compare_to)
  72        return copy.deepcopy(Comparison._memoizer_mem[key])
  73
  74    return memoizer
  75
  76def _memoize_metadata_items_set_distance(func):
  77    def memoizer(original, compare_to):
  78        key = repr(original) + repr(compare_to)
  79        if key not in Comparison._memoizer_mem:
  80            Comparison._memoizer_mem[key] = func(original, compare_to)
  81        return copy.deepcopy(Comparison._memoizer_mem[key])
  82
  83    return memoizer
  84
  85def _memoize_inside_bars_diff_lin(func):
  86    def memoizer(original, compare_to):
  87        key = repr(original) + repr(compare_to)
  88        if key not in Comparison._memoizer_mem:
  89            Comparison._memoizer_mem[key] = func(original, compare_to)
  90        return copy.deepcopy(Comparison._memoizer_mem[key])
  91
  92    return memoizer
  93
  94def _memoize_lyrics_diff_lin(func):
  95    def memoizer(original, compare_to):
  96        key = repr(original) + repr(compare_to)
  97        if key not in Comparison._memoizer_mem:
  98            Comparison._memoizer_mem[key] = func(original, compare_to)
  99        return copy.deepcopy(Comparison._memoizer_mem[key])
 100
 101    return memoizer
 102
 103def _memoize_block_diff_lin(func):
 104    def memoizer(original, compare_to):
 105        key = repr(original) + repr(compare_to)
 106        if key not in Comparison._memoizer_mem:
 107            Comparison._memoizer_mem[key] = func(original, compare_to)
 108        return copy.deepcopy(Comparison._memoizer_mem[key])
 109
 110    return memoizer
 111
 112def _memoize_pitches_lev_diff(func):
 113    def memoizer(original, compare_to, noteNode1, noteNode2, ids):
 114        key = (
 115            repr(original)
 116            + repr(compare_to)
 117            + repr(noteNode1)
 118            + repr(noteNode2)
 119            + repr(ids)
 120        )
 121        if key not in Comparison._memoizer_mem:
 122            Comparison._memoizer_mem[key] = func(original, compare_to, noteNode1, noteNode2, ids)
 123        return copy.deepcopy(Comparison._memoizer_mem[key])
 124
 125    return memoizer
 126
 127def _memoize_beamtuplet_lev_diff(func):
 128    def memoizer(original, compare_to, noteNode1, noteNode2, which):
 129        key = (
 130            repr(original) + repr(compare_to) + repr(noteNode1) + repr(noteNode2) + which
 131        )
 132        if key not in Comparison._memoizer_mem:
 133            Comparison._memoizer_mem[key] = func(original, compare_to, noteNode1, noteNode2, which)
 134        return copy.deepcopy(Comparison._memoizer_mem[key])
 135
 136    return memoizer
 137
 138def _memoize_generic_lev_diff(func):
 139    def memoizer(original, compare_to, noteNode1, noteNode2, which):
 140        key = (
 141            repr(original) + repr(compare_to) + repr(noteNode1) + repr(noteNode2) + which
 142        )
 143        if key not in Comparison._memoizer_mem:
 144            Comparison._memoizer_mem[key] = func(original, compare_to, noteNode1, noteNode2, which)
 145        return copy.deepcopy(Comparison._memoizer_mem[key])
 146
 147    return memoizer
 148
 149class Comparison:
 150    _memoizer_mem: dict = {}
 151
 152    @staticmethod
 153    def _clear_memoizer_caches():
 154        Comparison._memoizer_mem = {}
 155
 156    @staticmethod
 157    def _myers_diff(a_lines, b_lines):
 158        # Myers algorithm for LCS of bars (instead of the recursive algorithm in section 3.2)
 159        # This marks the farthest-right point along each diagonal in the edit
 160        # graph, along with the history that got it there
 161        Frontier = namedtuple("Frontier", ["x", "history"])
 162        frontier = {1: Frontier(0, [])}
 163
 164        a_max = len(a_lines)
 165        b_max = len(b_lines)
 166        for d in range(0, a_max + b_max + 1):
 167            for k in range(-d, d + 1, 2):
 168                # This determines whether our next search point will be going down
 169                # in the edit graph, or to the right.
 170                #
 171                # The intuition for this is that we should go down if we're on the
 172                # left edge (k == -d) to make sure that the left edge is fully
 173                # explored.
 174                #
 175                # If we aren't on the top (k != d), then only go down if going down
 176                # would take us to territory that hasn't sufficiently been explored
 177                # yet.
 178                go_down = k == -d or (k != d and frontier[k - 1].x < frontier[k + 1].x)
 179
 180                # Figure out the starting point of this iteration. The diagonal
 181                # offsets come from the geometry of the edit grid - if you're going
 182                # down, your diagonal is lower, and if you're going right, your
 183                # diagonal is higher.
 184                if go_down:
 185                    old_x, history = frontier[k + 1]
 186                    x = old_x
 187                else:
 188                    old_x, history = frontier[k - 1]
 189                    x = old_x + 1
 190
 191                # We want to avoid modifying the old history, since some other step
 192                # may decide to use it.
 193                history = history[:]
 194                y = x - k
 195
 196                # We start at the invalid point (0, 0) - we should only start building
 197                # up history when we move off of it.
 198                if 1 <= y <= b_max and go_down:
 199                    history.append((1, b_lines[y - 1][1]))  # add comparetostep
 200                elif 1 <= x <= a_max:
 201                    history.append((0, a_lines[x - 1][1]))  # add originalstep
 202
 203                # Chew up as many diagonal moves as we can - these correspond to common lines,
 204                # and they're considered "free" by the algorithm because we want to maximize
 205                # the number of these in the output.
 206                while x < a_max and y < b_max and a_lines[x][0] == b_lines[y][0]:
 207                    x += 1
 208                    y += 1
 209                    history.append((2, a_lines[x - 1][1]))  # add equal step
 210
 211                if x >= a_max and y >= b_max:
 212                    # If we're here, then we've traversed through the bottom-left corner,
 213                    # and are done.
 214                    return np.array(history)
 215
 216                frontier[k] = Frontier(x, history)
 217
 218        assert False, "Could not find edit script"
 219
 220    @staticmethod
 221    def _non_common_subsequences_myers(original, compare_to):
 222        # Both original and compare_to are list of lists, or numpy arrays with 2 columns.
 223        # This is necessary because bars need two representation at the same time.
 224        # One without the id (for comparison), and one with the id (to retrieve the bar
 225        # at the end).
 226
 227        # get the list of operations
 228        op_list = Comparison._myers_diff(
 229            np.array(original, dtype=np.int64), np.array(compare_to, dtype=np.int64)
 230        )[::-1]
 231        # retrieve the non common subsequences
 232        non_common_subsequences = []
 233        non_common_subsequences.append({"original": [], "compare_to": []})
 234        ind = 0
 235        for op in op_list[::-1]:
 236            if op[0] == 2:  # equal
 237                non_common_subsequences.append({"original": [], "compare_to": []})
 238                ind += 1
 239            elif op[0] == 0:  # original step
 240                non_common_subsequences[ind]["original"].append(op[1])
 241            elif op[0] == 1:  # compare to step
 242                non_common_subsequences[ind]["compare_to"].append(op[1])
 243        # remove the empty dict from the list
 244        non_common_subsequences = [
 245            s for s in non_common_subsequences if s != {"original": [], "compare_to": []}
 246        ]
 247        return non_common_subsequences
 248
 249    @staticmethod
 250    def _non_common_subsequences_of_measures(original_m, compare_to_m):
 251        # Take the hash for each measure to run faster comparison
 252        # We need two hashes: one that is independent of the IDs (precomputed_str, for comparison),
 253        # and one that contains the IDs (precomputed_repr, to retrieve the correct measure after
 254        # computation)
 255        original_int = [[o.precomputed_str, o.precomputed_repr] for o in original_m]
 256        compare_to_int = [[c.precomputed_str, c.precomputed_repr] for c in compare_to_m]
 257        ncs = Comparison._non_common_subsequences_myers(original_int, compare_to_int)
 258        # retrieve the original pointers to measures
 259        new_out = []
 260        for e in ncs:
 261            new_out.append({})
 262            for k in e.keys():
 263                new_out[-1][k] = []
 264                for repr_hash in e[k]:
 265                    if k == "original":
 266                        new_out[-1][k].append(
 267                            next(m for m in original_m if m.precomputed_repr == repr_hash)
 268                        )
 269                    else:
 270                        new_out[-1][k].append(
 271                            next(m for m in compare_to_m if m.precomputed_repr == repr_hash)
 272                        )
 273
 274        return new_out
 275
 276    @staticmethod
 277    @_memoize_pitches_lev_diff
 278    def _pitches_levenshtein_diff(
 279        original: list[tuple[str, str, bool]],
 280        compare_to: list[tuple[str, str, bool]],
 281        noteNode1: AnnNote,
 282        noteNode2: AnnNote,
 283        ids: tuple[int, int]
 284    ):
 285        """
 286        Compute the levenshtein distance between two sequences of pitches.
 287        Arguments:
 288            original {list} -- list of pitches
 289            compare_to {list} -- list of pitches
 290            noteNode1 {annotatedNote} --for referencing
 291            noteNode2 {annotatedNote} --for referencing
 292            ids {tuple} -- a tuple of 2 elements with the indices of the notes considered
 293        """
 294        if len(original) == 0 and len(compare_to) == 0:
 295            return [], 0
 296
 297        if len(original) == 0:
 298            op_list, cost = Comparison._pitches_levenshtein_diff(
 299                original, compare_to[1:], noteNode1, noteNode2, (ids[0], ids[1] + 1)
 300            )
 301            op_list.append(
 302                ("inspitch", noteNode1, noteNode2, M21Utils.pitch_size(compare_to[0]), ids)
 303            )
 304            cost += M21Utils.pitch_size(compare_to[0])
 305            return op_list, cost
 306
 307        if len(compare_to) == 0:
 308            op_list, cost = Comparison._pitches_levenshtein_diff(
 309                original[1:], compare_to, noteNode1, noteNode2, (ids[0] + 1, ids[1])
 310            )
 311            op_list.append(
 312                ("delpitch", noteNode1, noteNode2, M21Utils.pitch_size(original[0]), ids)
 313            )
 314            cost += M21Utils.pitch_size(original[0])
 315            return op_list, cost
 316
 317        # compute the cost and the op_list for the many possibilities of recursion
 318        cost_dict = {}
 319        op_list_dict = {}
 320        # del-pitch
 321        op_list_dict["delpitch"], cost_dict["delpitch"] = Comparison._pitches_levenshtein_diff(
 322            original[1:], compare_to, noteNode1, noteNode2, (ids[0] + 1, ids[1])
 323        )
 324        cost_dict["delpitch"] += M21Utils.pitch_size(original[0])
 325        op_list_dict["delpitch"].append(
 326            ("delpitch", noteNode1, noteNode2, M21Utils.pitch_size(original[0]), ids)
 327        )
 328        # ins-pitch
 329        op_list_dict["inspitch"], cost_dict["inspitch"] = Comparison._pitches_levenshtein_diff(
 330            original, compare_to[1:], noteNode1, noteNode2, (ids[0], ids[1] + 1)
 331        )
 332        cost_dict["inspitch"] += M21Utils.pitch_size(compare_to[0])
 333        op_list_dict["inspitch"].append(
 334            ("inspitch", noteNode1, noteNode2, M21Utils.pitch_size(compare_to[0]), ids)
 335        )
 336        # edit-pitch
 337        op_list_dict["editpitch"], cost_dict["editpitch"] = Comparison._pitches_levenshtein_diff(
 338            original[1:], compare_to[1:], noteNode1, noteNode2, (ids[0] + 1, ids[1] + 1)
 339        )
 340        if original[0] == compare_to[0]:  # to avoid perform the pitch_diff
 341            pitch_diff_op_list = []
 342            pitch_diff_cost = 0
 343        else:
 344            pitch_diff_op_list, pitch_diff_cost = Comparison._pitches_diff(
 345                original[0], compare_to[0], noteNode1, noteNode2, (ids[0], ids[1])
 346            )
 347        cost_dict["editpitch"] += pitch_diff_cost
 348        op_list_dict["editpitch"].extend(pitch_diff_op_list)
 349        # compute the minimum of the possibilities
 350        min_key = min(cost_dict, key=lambda k: cost_dict[k])
 351        out = op_list_dict[min_key], cost_dict[min_key]
 352        return out
 353
 354    @staticmethod
 355    def _pitches_diff(pitch1, pitch2, noteNode1, noteNode2, ids):
 356        """
 357        Compute the differences between two pitch (definition from the paper).
 358        a pitch consist of a tuple: pitch name (letter+number), accidental, tie.
 359        param : pitch1. The music_notation_repr tuple of note1
 360        param : pitch2. The music_notation_repr tuple of note2
 361        param : noteNode1. The noteNode where pitch1 belongs
 362        param : noteNode2. The noteNode where pitch2 belongs
 363        param : ids. (id_from_note1,id_from_note2) The indices of the notes in case of a chord
 364        Returns:
 365            [list] -- the list of differences
 366            [int] -- the cost of diff
 367        """
 368        cost = 0
 369        op_list = []
 370        # add for pitch name differences
 371        if pitch1[0] != pitch2[0]:
 372            cost += 1
 373            # TODO: select the note in a more precise way in case of a chord
 374            # rest to note
 375            if (pitch1[0][0] == "R") != (pitch2[0][0] == "R"):  # xor
 376                op_list.append(("pitchtypeedit", noteNode1, noteNode2, 1, ids))
 377            else:  # they are two notes
 378                op_list.append(("pitchnameedit", noteNode1, noteNode2, 1, ids))
 379
 380        # add for the accidentals
 381        if pitch1[1] != pitch2[1]:  # if the accidental is different
 382            if pitch1[1] == "None":
 383                assert pitch2[1] != "None"
 384                cost += 1
 385                op_list.append(("accidentins", noteNode1, noteNode2, 1, ids))
 386            elif pitch2[1] == "None":
 387                assert pitch1[1] != "None"
 388                cost += 1
 389                op_list.append(("accidentdel", noteNode1, noteNode2, 1, ids))
 390            else:  # a different type of alteration is present
 391                cost += 2  # delete then add
 392                op_list.append(("accidentedit", noteNode1, noteNode2, 2, ids))
 393        # add for the ties
 394        if pitch1[2] != pitch2[2]:
 395            # exclusive or. Add if one is tied and not the other.
 396            # probably to revise for chords
 397            cost += 1
 398            if pitch1[2]:
 399                assert not pitch2[2]
 400                op_list.append(("tiedel", noteNode1, noteNode2, 1, ids))
 401            elif pitch2[2]:
 402                assert not pitch1[2]
 403                op_list.append(("tieins", noteNode1, noteNode2, 1, ids))
 404        return op_list, cost
 405
 406    @staticmethod
 407    @_memoize_block_diff_lin
 408    def _block_diff_lin(original, compare_to):
 409        if len(original) == 0 and len(compare_to) == 0:
 410            return [], 0
 411
 412        if len(original) == 0:
 413            op_list, cost = Comparison._block_diff_lin(original, compare_to[1:])
 414            cost += compare_to[0].notation_size()
 415            op_list.append(("insbar", None, compare_to[0], compare_to[0].notation_size()))
 416            return op_list, cost
 417
 418        if len(compare_to) == 0:
 419            op_list, cost = Comparison._block_diff_lin(original[1:], compare_to)
 420            cost += original[0].notation_size()
 421            op_list.append(("delbar", original[0], None, original[0].notation_size()))
 422            return op_list, cost
 423
 424        # compute the cost and the op_list for the many possibilities of recursion
 425        cost_dict = {}
 426        op_list_dict = {}
 427        # del-bar
 428        op_list_dict["delbar"], cost_dict["delbar"] = Comparison._block_diff_lin(
 429            original[1:], compare_to
 430        )
 431        cost_dict["delbar"] += original[0].notation_size()
 432        op_list_dict["delbar"].append(
 433            ("delbar", original[0], None, original[0].notation_size())
 434        )
 435        # ins-bar
 436        op_list_dict["insbar"], cost_dict["insbar"] = Comparison._block_diff_lin(
 437            original, compare_to[1:]
 438        )
 439        cost_dict["insbar"] += compare_to[0].notation_size()
 440        op_list_dict["insbar"].append(
 441            ("insbar", None, compare_to[0], compare_to[0].notation_size())
 442        )
 443        # edit-bar
 444        op_list_dict["editbar"], cost_dict["editbar"] = Comparison._block_diff_lin(
 445            original[1:], compare_to[1:]
 446        )
 447        if (
 448            original[0] == compare_to[0]
 449        ):  # to avoid performing the _voices_coupling_recursive/_notes_set_distance
 450            # if it's not needed
 451            inside_bar_op_list = []
 452            inside_bar_cost = 0
 453        else:
 454            # diff the bar extras (like _notes_set_distance, but with lists of AnnExtras
 455            # instead of lists of AnnNotes)
 456            extras_op_list, extras_cost = Comparison._extras_set_distance(
 457                original[0].extras_list, compare_to[0].extras_list
 458            )
 459
 460            # diff the bar lyrics (with lists of AnnLyrics instead of lists of AnnExtras)
 461            lyrics_op_list, lyrics_cost = Comparison._lyrics_diff_lin(
 462                original[0].lyrics_list, compare_to[0].lyrics_list
 463            )
 464
 465            if original[0].includes_voicing:
 466                # run the voice coupling algorithm, and add to inside_bar_op_list
 467                # and inside_bar_cost
 468                inside_bar_op_list, inside_bar_cost = (
 469                    Comparison._voices_coupling_recursive(
 470                        original[0].voices_list, compare_to[0].voices_list
 471                    )
 472                )
 473            else:
 474                # run the set distance algorithm, and add to inside_bar_op_list
 475                # and inside_bar_cost
 476                inside_bar_op_list, inside_bar_cost = Comparison._notes_set_distance(
 477                    original[0].annot_notes, compare_to[0].annot_notes
 478                )
 479
 480            inside_bar_op_list.extend(extras_op_list)
 481            inside_bar_cost += extras_cost
 482            inside_bar_op_list.extend(lyrics_op_list)
 483            inside_bar_cost += lyrics_cost
 484
 485        cost_dict["editbar"] += inside_bar_cost
 486        op_list_dict["editbar"].extend(inside_bar_op_list)
 487        # compute the minimum of the possibilities
 488        min_key = min(cost_dict, key=lambda k: cost_dict[k])
 489        out = op_list_dict[min_key], cost_dict[min_key]
 490        return out
 491
 492    @staticmethod
 493    @_memoize_lyrics_diff_lin
 494    def _lyrics_diff_lin(original, compare_to):
 495        # original and compare to are two lists of AnnLyric
 496        if len(original) == 0 and len(compare_to) == 0:
 497            return [], 0
 498
 499        if len(original) == 0:
 500            cost = 0
 501            op_list, cost = Comparison._lyrics_diff_lin(original, compare_to[1:])
 502            op_list.append(("lyricins", None, compare_to[0], compare_to[0].notation_size()))
 503            cost += compare_to[0].notation_size()
 504            return op_list, cost
 505
 506        if len(compare_to) == 0:
 507            cost = 0
 508            op_list, cost = Comparison._lyrics_diff_lin(original[1:], compare_to)
 509            op_list.append(("lyricdel", original[0], None, original[0].notation_size()))
 510            cost += original[0].notation_size()
 511            return op_list, cost
 512
 513        # compute the cost and the op_list for the many possibilities of recursion
 514        cost = {}
 515        op_list = {}
 516        # lyricdel
 517        op_list["lyricdel"], cost["lyricdel"] = Comparison._lyrics_diff_lin(
 518            original[1:], compare_to
 519        )
 520        cost["lyricdel"] += original[0].notation_size()
 521        op_list["lyricdel"].append(
 522            ("lyricdel", original[0], None, original[0].notation_size())
 523        )
 524        # lyricins
 525        op_list["lyricins"], cost["lyricins"] = Comparison._lyrics_diff_lin(
 526            original, compare_to[1:]
 527        )
 528        cost["lyricins"] += compare_to[0].notation_size()
 529        op_list["lyricins"].append(
 530            ("lyricins", None, compare_to[0], compare_to[0].notation_size())
 531        )
 532        # lyricsub
 533        op_list["lyricsub"], cost["lyricsub"] = Comparison._lyrics_diff_lin(
 534            original[1:], compare_to[1:]
 535        )
 536        if (
 537            original[0] == compare_to[0]
 538        ):  # avoid call another function if they are equal
 539            lyricsub_op, lyricsub_cost = [], 0
 540        else:
 541            lyricsub_op, lyricsub_cost = (
 542                Comparison._annotated_lyric_diff(original[0], compare_to[0])
 543            )
 544        cost["lyricsub"] += lyricsub_cost
 545        op_list["lyricsub"].extend(lyricsub_op)
 546        # compute the minimum of the possibilities
 547        min_key = min(cost, key=cost.get)
 548        out = op_list[min_key], cost[min_key]
 549        return out
 550
 551    @staticmethod
 552    def _strings_levenshtein_distance(str1: str, str2: str):
 553        counter: dict = {"+": 0, "-": 0}
 554        distance: int = 0
 555        for edit_code in ndiff(str1, str2):
 556            if edit_code[0] == " ":
 557                distance += max(counter.values())
 558                counter = {"+": 0, "-": 0}
 559            else:
 560                counter[edit_code[0]] += 1
 561        distance += max(counter.values())
 562        return distance
 563
 564    @staticmethod
 565    def _areDifferentEnough(off1: OffsetQL | None, off2: OffsetQL | None) -> bool:
 566        if off1 == off2:
 567            return False
 568
 569        # this should never happen, but...
 570        if off1 is None or off2 is None:
 571            if off1 is None and off2 is not None:
 572                return True
 573            if off1 is not None and off2 is None:
 574                return True
 575            return False  # both are None, therefore not different at all
 576
 577        diff: OffsetQL = off1 - off2
 578        if diff < 0:
 579            diff = -diff
 580
 581        if diff > 0.0001:
 582            return True
 583        return False
 584
 585    @staticmethod
 586    def _annotated_extra_diff(annExtra1: AnnExtra, annExtra2: AnnExtra):
 587        """
 588        Compute the differences between two annotated extras.
 589        Each annotated extra consists of three values: content, offset, and duration
 590        """
 591        cost = 0
 592        op_list = []
 593
 594        # add for the content
 595        if annExtra1.content != annExtra2.content:
 596            content_cost: int = (
 597                Comparison._strings_levenshtein_distance(
 598                    annExtra1.content or '',
 599                    annExtra2.content or ''
 600                )
 601            )
 602            cost += content_cost
 603            op_list.append(("extracontentedit", annExtra1, annExtra2, content_cost))
 604
 605        # add for the symbolic (cost 2: delete one symbol, add the other)
 606        if annExtra1.symbolic != annExtra2.symbolic:
 607            cost += 2
 608            op_list.append(("extrasymboledit", annExtra1, annExtra2, 2))
 609
 610        # add for the infodict
 611        if annExtra1.infodict != annExtra2.infodict:
 612            info_cost: int = 0
 613            # handle everything in annExtra1 (whether or not it is in annExtra2)
 614            for k, v in annExtra1.infodict.items():
 615                if k not in annExtra2.infodict:
 616                    # not in annExtra2: delete a symbol
 617                    info_cost += 1
 618                elif v != annExtra2.infodict[k]:
 619                    # different in annExtra2: delete a symbol, add a symbol
 620                    info_cost += 2
 621            # handle everything in annExtra2 that is not in annExtra1
 622            for k in annExtra2.infodict:
 623                if k not in annExtra1.infodict:
 624                    # add a symbol
 625                    info_cost += 1
 626            cost += info_cost
 627            op_list.append(("extrainfoedit", annExtra1, annExtra2, info_cost))
 628
 629        # add for the offset
 630        # Note: offset here is a float, and some file formats have only four
 631        # decimal places of precision.  So we should not compare exactly here.
 632        if Comparison._areDifferentEnough(annExtra1.offset, annExtra2.offset):
 633            cost += 1
 634            op_list.append(("extraoffsetedit", annExtra1, annExtra2, 1))
 635
 636        # add for the duration
 637        # Note: duration here is a float, and some file formats have only four
 638        # decimal places of precision.  So we should not compare exactly here.
 639        if Comparison._areDifferentEnough(annExtra1.duration, annExtra2.duration):
 640            cost += 1
 641            op_list.append(("extradurationedit", annExtra1, annExtra2, 1))
 642
 643        # add for the style
 644        if annExtra1.styledict != annExtra2.styledict:
 645            cost += 1  # someday we might count different items in the styledict
 646            op_list.append(("extrastyleedit", annExtra1, annExtra2, 1))
 647
 648        return op_list, cost
 649
 650    @staticmethod
 651    def _annotated_lyric_diff(annLyric1: AnnLyric, annLyric2: AnnLyric):
 652        """
 653        Compute the differences between two annotated lyrics.
 654        Each annotated lyric consists of five values: lyric, verse_id, offset, duration,
 655        and styledict.
 656        """
 657        cost = 0
 658        op_list = []
 659
 660        # add for the content
 661        if annLyric1.lyric != annLyric2.lyric:
 662            content_cost: int = (
 663                Comparison._strings_levenshtein_distance(annLyric1.lyric, annLyric2.lyric)
 664            )
 665            cost += content_cost
 666            op_list.append(("lyricedit", annLyric1, annLyric2, content_cost))
 667
 668        # add for the number
 669        if annLyric1.number != annLyric2.number:
 670            number_cost: int
 671            if annLyric1.number == 0 or annLyric2.number == 0:
 672                # add or delete number
 673                number_cost = 1
 674            else:
 675                # add and delete number
 676                number_cost = 2
 677            cost += number_cost
 678            op_list.append(("lyricnumedit", annLyric1, annLyric2, number_cost))
 679
 680        # add for the identifier
 681        if annLyric1.identifier != annLyric2.identifier:
 682            # someday we might do a Levenshtein distance of the two ids
 683            id_cost: int
 684            if not annLyric1.identifier or not annLyric1.identifier:
 685                # add or delete identifier
 686                id_cost = 1
 687            else:
 688                # add and delete identifier
 689                id_cost = 2
 690            cost += id_cost
 691            op_list.append(("lyricidedit", annLyric1, annLyric2, id_cost))
 692
 693        # add for the offset
 694        # Note: offset here is a float, and some file formats have only four
 695        # decimal places of precision.  So we should not compare exactly here.
 696        if Comparison._areDifferentEnough(annLyric1.offset, annLyric2.offset):
 697            cost += 1
 698            op_list.append(("lyricoffsetedit", annLyric1, annLyric2, 1))
 699
 700        # add for the style
 701        if annLyric1.styledict != annLyric2.styledict:
 702            cost += 1  # someday we might count different items in the styledict
 703            op_list.append(("lyricstyleedit", annLyric1, annLyric2, 1))
 704
 705        return op_list, cost
 706
 707    @staticmethod
 708    def _annotated_metadata_item_diff(
 709        annMetadataItem1: AnnMetadataItem,
 710        annMetadataItem2: AnnMetadataItem
 711    ):
 712        """
 713        Compute the differences between two annotated metadata items.
 714        Each annotated metadata item has two values: key: str, value: t.Any,
 715        """
 716        cost = 0
 717        op_list = []
 718
 719        # add for the key (this never adds anything because we don't compare
 720        # items that don't have the same key).
 721        if annMetadataItem1.key != annMetadataItem2.key:
 722            key_cost: int = (
 723                Comparison._strings_levenshtein_distance(
 724                    annMetadataItem1.key,
 725                    annMetadataItem2.key
 726                )
 727            )
 728            cost += key_cost
 729            op_list.append(("mditemkeyedit", annMetadataItem1, annMetadataItem2, key_cost))
 730
 731        # add for the value
 732        if annMetadataItem1.value != annMetadataItem2.value:
 733            value_cost: int = (
 734                Comparison._strings_levenshtein_distance(
 735                    str(annMetadataItem1.value),
 736                    str(annMetadataItem2.value)
 737                )
 738            )
 739            cost += value_cost
 740            op_list.append(
 741                ("mditemvalueedit", annMetadataItem1, annMetadataItem2, value_cost)
 742            )
 743
 744        return op_list, cost
 745
 746    @staticmethod
 747    def _annotated_staff_group_diff(annStaffGroup1: AnnStaffGroup, annStaffGroup2: AnnStaffGroup):
 748        """
 749        Compute the differences between two annotated staff groups.
 750        Each annotated staff group consists of five values: name, abbreviation,
 751        symbol, barTogether, part_indices.
 752        """
 753        cost = 0
 754        op_list = []
 755
 756        # add for the name
 757        if annStaffGroup1.name != annStaffGroup2.name:
 758            name_cost: int = (
 759                Comparison._strings_levenshtein_distance(
 760                    annStaffGroup1.name,
 761                    annStaffGroup2.name
 762                )
 763            )
 764            cost += name_cost
 765            op_list.append(("staffgrpnameedit", annStaffGroup1, annStaffGroup2, name_cost))
 766
 767        # add for the abbreviation
 768        if annStaffGroup1.abbreviation != annStaffGroup2.abbreviation:
 769            abbreviation_cost: int = (
 770                Comparison._strings_levenshtein_distance(
 771                    annStaffGroup1.abbreviation,
 772                    annStaffGroup2.abbreviation
 773                )
 774            )
 775            cost += abbreviation_cost
 776            op_list.append((
 777                "staffgrpabbreviationedit",
 778                annStaffGroup1,
 779                annStaffGroup2,
 780                abbreviation_cost
 781            ))
 782
 783        # add for the symbol
 784        if annStaffGroup1.symbol != annStaffGroup2.symbol:
 785            symbol_cost: int
 786            if not annStaffGroup1.symbol or not annStaffGroup2.symbol:
 787                # add or delete symbol
 788                symbol_cost = 1
 789            else:
 790                # add and delete symbol
 791                symbol_cost = 2
 792            cost += symbol_cost
 793            op_list.append(
 794                ("staffgrpsymboledit", annStaffGroup1, annStaffGroup2, symbol_cost)
 795            )
 796
 797        # add for barTogether
 798        if annStaffGroup1.barTogether != annStaffGroup2.barTogether:
 799            barTogether_cost: int = 1
 800            cost += barTogether_cost
 801            op_list.append(
 802                ("staffgrpbartogetheredit", annStaffGroup1, annStaffGroup2, barTogether_cost)
 803            )
 804
 805        # add for partIndices (sorted list of int)
 806        if annStaffGroup1.part_indices != annStaffGroup2.part_indices:
 807            partIndices_cost: int = 0
 808            if annStaffGroup1.part_indices[0] != annStaffGroup2.part_indices[0]:
 809                partIndices_cost += 1  # vertical start
 810            if annStaffGroup1.part_indices[-1] != annStaffGroup2.part_indices[-1]:
 811                partIndices_cost += 1  # vertical height
 812            if partIndices_cost == 0:
 813                # should never get here, but we have to have a cost
 814                partIndices_cost = 1
 815            cost += partIndices_cost
 816            op_list.append(
 817                ("staffgrppartindicesedit", annStaffGroup1, annStaffGroup2, partIndices_cost)
 818            )
 819
 820        return op_list, cost
 821
 822    @staticmethod
 823    @_memoize_inside_bars_diff_lin
 824    def _inside_bars_diff_lin(original, compare_to):
 825        # original and compare to are two lists of annotatedNote
 826        if len(original) == 0 and len(compare_to) == 0:
 827            return [], 0
 828
 829        if len(original) == 0:
 830            cost = 0
 831            op_list, cost = Comparison._inside_bars_diff_lin(original, compare_to[1:])
 832            op_list.append(("noteins", None, compare_to[0], compare_to[0].notation_size()))
 833            cost += compare_to[0].notation_size()
 834            return op_list, cost
 835
 836        if len(compare_to) == 0:
 837            cost = 0
 838            op_list, cost = Comparison._inside_bars_diff_lin(original[1:], compare_to)
 839            op_list.append(("notedel", original[0], None, original[0].notation_size()))
 840            cost += original[0].notation_size()
 841            return op_list, cost
 842
 843        # compute the cost and the op_list for the many possibilities of recursion
 844        cost = {}
 845        op_list = {}
 846        # notedel
 847        op_list["notedel"], cost["notedel"] = Comparison._inside_bars_diff_lin(
 848            original[1:], compare_to
 849        )
 850        cost["notedel"] += original[0].notation_size()
 851        op_list["notedel"].append(
 852            ("notedel", original[0], None, original[0].notation_size())
 853        )
 854        # noteins
 855        op_list["noteins"], cost["noteins"] = Comparison._inside_bars_diff_lin(
 856            original, compare_to[1:]
 857        )
 858        cost["noteins"] += compare_to[0].notation_size()
 859        op_list["noteins"].append(
 860            ("noteins", None, compare_to[0], compare_to[0].notation_size())
 861        )
 862        # notesub
 863        op_list["notesub"], cost["notesub"] = Comparison._inside_bars_diff_lin(
 864            original[1:], compare_to[1:]
 865        )
 866        if (
 867            original[0] == compare_to[0]
 868        ):  # avoid call another function if they are equal
 869            notesub_op, notesub_cost = [], 0
 870        else:
 871            notesub_op, notesub_cost = Comparison._annotated_note_diff(original[0], compare_to[0])
 872        cost["notesub"] += notesub_cost
 873        op_list["notesub"].extend(notesub_op)
 874        # compute the minimum of the possibilities
 875        min_key = min(cost, key=cost.get)
 876        out = op_list[min_key], cost[min_key]
 877        return out
 878
 879    @staticmethod
 880    def _annotated_note_diff(annNote1: AnnNote, annNote2: AnnNote):
 881        """
 882        Compute the differences between two annotated notes.
 883        Each annotated note consist in a 5tuple (pitches, notehead, dots, beamings, tuplets)
 884        where pitches is a list.
 885        Arguments:
 886            noteNode1 {[AnnNote]} -- original AnnNote
 887            noteNode2 {[AnnNote]} -- compare_to AnnNote
 888        """
 889        cost = 0
 890        op_list = []
 891        # add for the pitches
 892        # if they are equal
 893        if annNote1.pitches == annNote2.pitches:
 894            op_list_pitch, cost_pitch = [], 0
 895        else:
 896            # pitches diff is computed using Levenshtein distances (they are already ordered)
 897            op_list_pitch, cost_pitch = Comparison._pitches_levenshtein_diff(
 898                annNote1.pitches, annNote2.pitches, annNote1, annNote2, (0, 0)
 899            )
 900        op_list.extend(op_list_pitch)
 901        cost += cost_pitch
 902        # add for the notehead
 903        if annNote1.note_head != annNote2.note_head:
 904            # delete one note head, add the other (this isn't noteshape, this is
 905            # just quarter-note note head vs half-note note head, etc)
 906            cost += 2
 907            op_list.append(("headedit", annNote1, annNote2, 2))
 908        # add for the dots
 909        if annNote1.dots != annNote2.dots:
 910            # add one for each added (or deleted) dot
 911            dots_diff = abs(annNote1.dots - annNote2.dots)
 912            cost += dots_diff
 913            if annNote1.dots > annNote2.dots:
 914                op_list.append(("dotdel", annNote1, annNote2, dots_diff))
 915            else:
 916                op_list.append(("dotins", annNote1, annNote2, dots_diff))
 917        if annNote1.graceType != annNote2.graceType:
 918            # accented vs unaccented vs not a grace note (delete the wrong, add the right)
 919            cost += 2
 920            op_list.append(("graceedit", annNote1, annNote2, 2))
 921        if annNote1.graceSlash != annNote2.graceSlash:
 922            # add or delete the slash
 923            cost += 1
 924            op_list.append(("graceslashedit", annNote1, annNote2, 1))
 925        # add for the beamings
 926        if annNote1.beamings != annNote2.beamings:
 927            beam_op_list, beam_cost = Comparison._beamtuplet_levenshtein_diff(
 928                annNote1.beamings, annNote2.beamings, annNote1, annNote2, "beam"
 929            )
 930            op_list.extend(beam_op_list)
 931            cost += beam_cost
 932        # add for the tuplet types
 933        if annNote1.tuplets != annNote2.tuplets:
 934            tuplet_op_list, tuplet_cost = Comparison._beamtuplet_levenshtein_diff(
 935                annNote1.tuplets, annNote2.tuplets, annNote1, annNote2, "tuplet"
 936            )
 937            op_list.extend(tuplet_op_list)
 938            cost += tuplet_cost
 939        # add for the tuplet info
 940        if annNote1.tuplet_info != annNote2.tuplet_info:
 941            tuplet_info_op_list, tuplet_info_cost = Comparison._beamtuplet_levenshtein_diff(
 942                annNote1.tuplet_info, annNote2.tuplet_info, annNote1, annNote2, "tuplet"
 943            )
 944            op_list.extend(tuplet_info_op_list)
 945            cost += tuplet_info_cost
 946        # add for the articulations
 947        if annNote1.articulations != annNote2.articulations:
 948            artic_op_list, artic_cost = Comparison._generic_levenshtein_diff(
 949                annNote1.articulations,
 950                annNote2.articulations,
 951                annNote1,
 952                annNote2,
 953                "articulation",
 954            )
 955            op_list.extend(artic_op_list)
 956            cost += artic_cost
 957        # add for the expressions
 958        if annNote1.expressions != annNote2.expressions:
 959            expr_op_list, expr_cost = Comparison._generic_levenshtein_diff(
 960                annNote1.expressions,
 961                annNote2.expressions,
 962                annNote1,
 963                annNote2,
 964                "expression",
 965            )
 966            op_list.extend(expr_op_list)
 967            cost += expr_cost
 968
 969        # add for gap from previous note or start of measure if first note in measure
 970        # (i.e. horizontal position shift)
 971        if annNote1.gap_dur != annNote2.gap_dur:
 972            # in all cases, the edit is a simple horizontal shift of the note
 973            cost += 1
 974            if annNote1.gap_dur == 0:
 975                op_list.append(("insspace", annNote1, annNote2, 1))
 976            elif annNote2.gap_dur == 0:
 977                op_list.append(("delspace", annNote1, annNote2, 1))
 978            else:
 979                # neither is zero
 980                op_list.append(("editspace", annNote1, annNote2, 1))
 981
 982        # add for noteshape
 983        if annNote1.noteshape != annNote2.noteshape:
 984            # always delete existing note shape and add the new one
 985            cost += 2
 986            op_list.append(("editnoteshape", annNote1, annNote2, 2))
 987        # add for noteheadFill
 988        if annNote1.noteheadFill != annNote2.noteheadFill:
 989            # always delete existing note fill and add the new one
 990            cost += 2
 991            op_list.append(("editnoteheadfill", annNote1, annNote2, 2))
 992        # add for noteheadParenthesis (True or False)
 993        if annNote1.noteheadParenthesis != annNote2.noteheadParenthesis:
 994            # always either add or delete parentheses
 995            cost += 1
 996            op_list.append(("editnoteheadparenthesis", annNote1, annNote2, 1))
 997        # add for stemDirection
 998        if annNote1.stemDirection != annNote2.stemDirection:
 999            stemdir_cost: int
1000            if annNote1.stemDirection == 'noStem' or annNote2.stemDirection == 'noStem':
1001                # gonna add a stem
1002                stemdir_cost = 1
1003            else:
1004                # gonna change a stem (add then delete)
1005                stemdir_cost = 2
1006            cost += stemdir_cost
1007            op_list.append(("editstemdirection", annNote1, annNote2, stemdir_cost))
1008        # add for the styledict
1009        if annNote1.styledict != annNote2.styledict:
1010            cost += 1
1011            op_list.append(("editstyle", annNote1, annNote2, 1))
1012
1013        return op_list, cost
1014
1015    @staticmethod
1016    @_memoize_beamtuplet_lev_diff
1017    def _beamtuplet_levenshtein_diff(original, compare_to, note1, note2, which):
1018        """
1019        Compute the levenshtein distance between two sequences of beaming or tuples.
1020        Arguments:
1021            original {list} -- list of strings (start, stop, continue or partial)
1022            compare_to {list} -- list of strings (start, stop, continue or partial)
1023            note1 {AnnNote} -- the note for referencing in the score
1024            note2 {AnnNote} -- the note for referencing in the score
1025            which -- a string: "beam" or "tuplet" depending what we are comparing
1026        """
1027        if which not in ("beam", "tuplet"):
1028            raise ValueError("Argument 'which' must be either 'beam' or 'tuplet'")
1029
1030        if len(original) == 0 and len(compare_to) == 0:
1031            return [], 0
1032
1033        if len(original) == 0:
1034            op_list, cost = Comparison._beamtuplet_levenshtein_diff(
1035                original, compare_to[1:], note1, note2, which
1036            )
1037            op_list.append(("ins" + which, note1, note2, 1))
1038            cost += 1
1039            return op_list, cost
1040
1041        if len(compare_to) == 0:
1042            op_list, cost = Comparison._beamtuplet_levenshtein_diff(
1043                original[1:], compare_to, note1, note2, which
1044            )
1045            op_list.append(("del" + which, note1, note2, 1))
1046            cost += 1
1047            return op_list, cost
1048
1049        # compute the cost and the op_list for the many possibilities of recursion
1050        cost = {}
1051        op_list = {}
1052        # delwhich
1053        op_list["del" + which], cost["del" + which] = Comparison._beamtuplet_levenshtein_diff(
1054            original[1:], compare_to, note1, note2, which
1055        )
1056        cost["del" + which] += 1
1057        op_list["del" + which].append(("del" + which, note1, note2, 1))
1058        # inswhich
1059        op_list["ins" + which], cost["ins" + which] = Comparison._beamtuplet_levenshtein_diff(
1060            original, compare_to[1:], note1, note2, which
1061        )
1062        cost["ins" + which] += 1
1063        op_list["ins" + which].append(("ins" + which, note1, note2, 1))
1064        # editwhich
1065        op_list["edit" + which], cost["edit" + which] = Comparison._beamtuplet_levenshtein_diff(
1066            original[1:], compare_to[1:], note1, note2, which
1067        )
1068        if original[0] == compare_to[0]:
1069            beam_diff_op_list = []
1070            beam_diff_cost = 0
1071        else:
1072            beam_diff_op_list, beam_diff_cost = [("edit" + which, note1, note2, 1)], 1
1073        cost["edit" + which] += beam_diff_cost
1074        op_list["edit" + which].extend(beam_diff_op_list)
1075        # compute the minimum of the possibilities
1076        min_key = min(cost, key=cost.get)
1077        out = op_list[min_key], cost[min_key]
1078        return out
1079
1080    @staticmethod
1081    @_memoize_generic_lev_diff
1082    def _generic_levenshtein_diff(original, compare_to, note1, note2, which):
1083        """
1084        Compute the Levenshtein distance between two generic sequences of symbols
1085        (e.g., articulations).
1086        Arguments:
1087            original {list} -- list of strings
1088            compare_to {list} -- list of strings
1089            note1 {AnnNote} -- the note for referencing in the score
1090            note2 {AnnNote} -- the note for referencing in the score
1091            which -- a string: e.g. "articulation" depending what we are comparing
1092        """
1093        if len(original) == 0 and len(compare_to) == 0:
1094            return [], 0
1095
1096        if len(original) == 0:
1097            op_list, cost = Comparison._generic_levenshtein_diff(
1098                original, compare_to[1:], note1, note2, which
1099            )
1100            op_list.append(("ins" + which, note1, note2, 1))
1101            cost += 1
1102            return op_list, cost
1103
1104        if len(compare_to) == 0:
1105            op_list, cost = Comparison._generic_levenshtein_diff(
1106                original[1:], compare_to, note1, note2, which
1107            )
1108            op_list.append(("del" + which, note1, note2, 1))
1109            cost += 1
1110            return op_list, cost
1111
1112        # compute the cost and the op_list for the many possibilities of recursion
1113        cost = {}
1114        op_list = {}
1115        # delwhich
1116        op_list["del" + which], cost["del" + which] = Comparison._generic_levenshtein_diff(
1117            original[1:], compare_to, note1, note2, which
1118        )
1119        cost["del" + which] += 1
1120        op_list["del" + which].append(("del" + which, note1, note2, 1))
1121        # inswhich
1122        op_list["ins" + which], cost["ins" + which] = Comparison._generic_levenshtein_diff(
1123            original, compare_to[1:], note1, note2, which
1124        )
1125        cost["ins" + which] += 1
1126        op_list["ins" + which].append(("ins" + which, note1, note2, 1))
1127        # editwhich
1128        op_list["edit" + which], cost["edit" + which] = Comparison._generic_levenshtein_diff(
1129            original[1:], compare_to[1:], note1, note2, which
1130        )
1131        if original[0] == compare_to[0]:  # to avoid perform the diff
1132            generic_diff_op_list = []
1133            generic_diff_cost = 0
1134        else:
1135            generic_diff_op_list, generic_diff_cost = (
1136                [("edit" + which, note1, note2, 1)],
1137                1,
1138            )
1139        cost["edit" + which] += generic_diff_cost
1140        op_list["edit" + which].extend(generic_diff_op_list)
1141        # compute the minimum of the possibilities
1142        min_key = min(cost, key=cost.get)
1143        out = op_list[min_key], cost[min_key]
1144        return out
1145
1146    @staticmethod
1147    @_memoize_notes_set_distance
1148    def _notes_set_distance(original: list[AnnNote], compare_to: list[AnnNote]):
1149        """
1150        Gather up pairs of matching notes (using pitch, offset, graceness, and visual duration, in
1151        that order of importance).  If you can't find an exactly matching note, try again without
1152        visual duration.
1153        original [list] -- a list of AnnNote (which are never chords)
1154        compare_to [list] -- a list of AnnNote (which are never chords)
1155        """
1156        paired_notes: list[tuple[AnnNote, AnnNote]] = []
1157        unpaired_orig_notes: list[AnnNote] = []
1158        unpaired_comp_notes: list[AnnNote] = copy.copy(compare_to)
1159
1160        for orig_n in original:
1161            fallback: AnnNote | None = None
1162            fallback_i: int = -1
1163            found_it: bool = False
1164            for i, comp_n in enumerate(unpaired_comp_notes):
1165                if orig_n.pitches[0][0] != comp_n.pitches[0][0]:
1166                    # this pitch comparison (1) assumes the note is not a chord
1167                    # (because we don't do chords when Voicing is not set, and
1168                    # we only call _notes_set_distance when Voicing is not set),
1169                    # and (2) only compares the visual position of the note (we
1170                    # are ignoring the accidental here).  This is so that an
1171                    # accidental change will show up as a pitch edit, not a
1172                    # note remove/insert.
1173                    continue
1174                if Comparison._areDifferentEnough(orig_n.note_offset, comp_n.note_offset):
1175                    continue
1176                if orig_n.note_is_grace != comp_n.note_is_grace:
1177                    continue
1178                if fallback is None:
1179                    fallback = comp_n
1180                    fallback_i = i
1181
1182                if orig_n.note_dur_type != comp_n.note_dur_type:
1183                    continue
1184                if orig_n.note_dur_dots != comp_n.note_dur_dots:
1185                    continue
1186
1187                # found a perfect match
1188                paired_notes.append((orig_n, comp_n))
1189
1190                # remove comp_n from unpaired_comp_notes
1191                unpaired_comp_notes.pop(i)  # remove(comp_n) would sometimes get the wrong one
1192
1193                found_it = True
1194                break
1195
1196            if found_it:
1197                # on to the next original note
1198                continue
1199
1200            # did we find a fallback (matched except for duration)?
1201            if fallback is not None:
1202                paired_notes.append((orig_n, fallback))
1203                unpaired_comp_notes.pop(fallback_i)
1204                continue
1205
1206            # we found nothing
1207            unpaired_orig_notes.append(orig_n)
1208
1209        # compute the cost and the op_list
1210        cost: int = 0
1211        op_list: list = []
1212
1213        # notedel
1214        if unpaired_orig_notes:
1215            for an in unpaired_orig_notes:
1216                cost += an.notation_size()
1217                op_list.append(("notedel", an, None, an.notation_size(), an.note_idx_in_chord))
1218
1219        # noteins
1220        if unpaired_comp_notes:
1221            for an in unpaired_comp_notes:
1222                cost += an.notation_size()
1223                op_list.append(("noteins", None, an, an.notation_size(), an.note_idx_in_chord))
1224
1225        # notesub
1226        if paired_notes:
1227            for ano, anc in paired_notes:
1228                if ano == anc:
1229                    # if equal, avoid _annotated_note_diff call
1230                    notesub_op, notesub_cost = [], 0
1231                else:
1232                    notesub_op, notesub_cost = (
1233                        Comparison._annotated_note_diff(ano, anc)
1234                    )
1235                cost += notesub_cost
1236                op_list.extend(notesub_op)
1237
1238        return op_list, cost
1239
1240    @staticmethod
1241    @_memoize_extras_set_distance
1242    def _extras_set_distance(original: list[AnnExtra], compare_to: list[AnnExtra]):
1243        """
1244        Gather up pairs of matching extras (using kind, offset, and visual duration, in
1245        that order of importance).  If you can't find an exactly matching extra, try again
1246        without visual duration.
1247        original [list] -- a list of AnnExtras
1248        compare_to [list] -- a list of AnnExtras
1249        """
1250        paired_extras: list[tuple[AnnExtra, AnnExtra]] = []
1251        unpaired_orig_extras: list[AnnExtra] = []
1252        unpaired_comp_extras: list[AnnExtra] = copy.copy(compare_to)
1253
1254        for orig_x in original:
1255            fallback: AnnExtra | None = None
1256            fallback_i: int = -1
1257            found_it: bool = False
1258            for i, comp_x in enumerate(unpaired_comp_extras):
1259                # kind and offset are required for pairing
1260                if orig_x.kind != comp_x.kind:
1261                    continue
1262                if Comparison._areDifferentEnough(orig_x.offset, comp_x.offset):
1263                    continue
1264                if fallback is None:
1265                    fallback = comp_x
1266                    fallback_i = i
1267
1268                # duration is preferred for pairing
1269                if Comparison._areDifferentEnough(orig_x.duration, comp_x.duration):
1270                    continue
1271
1272                # there are a few kind-specific elements that are also preferred:
1273                #   'direction'/'ending': content (visible text)
1274                #   'keysig'/'timesig'/'clef': symbolic (there are sometimes two
1275                #       simultaneous keysigs, timesigs, or clefs, and we don't
1276                #       want to confuse which one is which, producing a diff where
1277                #       there actually isn't one)
1278                #   'slur': placements (because there are often two identical slurs
1279                #       whose only difference is 'above' vs 'below', producing a diff
1280                #       where there actually isn't one)
1281                if orig_x.kind in ('direction', 'ending'):
1282                    if orig_x.content != comp_x.content:
1283                        continue
1284                if orig_x.kind == 'clef':
1285                    if orig_x.symbolic != comp_x.symbolic:
1286                        continue
1287                if orig_x.kind in ('keysig', 'timesig'):
1288                    if orig_x.infodict != comp_x.infodict:
1289                        continue
1290                if orig_x.kind == 'slur':
1291                    orig_placement: str = orig_x.styledict.get('placement', '')
1292                    comp_placement: str = comp_x.styledict.get('placement', '')
1293                    if orig_placement != comp_placement:
1294                        continue
1295
1296                # found a perfect match
1297                paired_extras.append((orig_x, comp_x))
1298
1299                # remove comp_n from unpaired_comp_extras
1300                unpaired_comp_extras.pop(i)  # remove(comp_n) would sometimes get the wrong one
1301
1302                found_it = True
1303                break
1304
1305            if found_it:
1306                # on to the next original extra
1307                continue
1308
1309            # did we find a fallback (matched except for duration)?
1310            if fallback is not None:
1311                paired_extras.append((orig_x, fallback))
1312                unpaired_comp_extras.pop(fallback_i)
1313                continue
1314
1315            # we found nothing
1316            unpaired_orig_extras.append(orig_x)
1317
1318        # compute the cost and the op_list
1319        cost: int = 0
1320        op_list: list = []
1321
1322        # extradel
1323        for extra in unpaired_orig_extras:
1324            cost += extra.notation_size()
1325            op_list.append(("extradel", extra, None, extra.notation_size()))
1326
1327        # extrains
1328        for extra in unpaired_comp_extras:
1329            cost += extra.notation_size()
1330            op_list.append(("extrains", None, extra, extra.notation_size()))
1331
1332        # extrasub
1333        if paired_extras:
1334            for extrao, extrac in paired_extras:
1335                if extrao == extrac:
1336                    # if equal, avoid _annotated_extra_diff call
1337                    extrasub_op, extrasub_cost = [], 0
1338                else:
1339                    extrasub_op, extrasub_cost = (
1340                        Comparison._annotated_extra_diff(extrao, extrac)
1341                    )
1342                cost += extrasub_cost
1343                op_list.extend(extrasub_op)
1344
1345        return op_list, cost
1346
1347    @staticmethod
1348    @_memoize_metadata_items_set_distance
1349    def _metadata_items_set_distance(
1350        original: list[AnnMetadataItem],
1351        compare_to: list[AnnMetadataItem]
1352    ):
1353        """
1354        Gather up pairs of matching metadata_items (using key and value).  If you can't find
1355        an exactly matching metadata_item, try again without value.
1356        original [list] -- a list of AnnMetadataItems
1357        compare_to [list] -- a list of AnnMetadataItems
1358        """
1359        paired_metadata_items: list[tuple[AnnMetadataItem, AnnMetadataItem]] = []
1360        unpaired_orig_metadata_items: list[AnnMetadataItem] = []
1361        unpaired_comp_metadata_items: list[AnnMetadataItem] = copy.copy(compare_to)
1362
1363        # first look for perfect matches
1364        for orig_mdi in original:
1365            found_it: bool = False
1366            for i, comp_mdi in enumerate(unpaired_comp_metadata_items):
1367                # key is required for perfect match
1368                if orig_mdi.key != comp_mdi.key:
1369                    continue
1370
1371                # value is required for perfect match
1372                if orig_mdi.value != comp_mdi.value:
1373                    continue
1374
1375                # found a perfect match
1376                paired_metadata_items.append((orig_mdi, comp_mdi))
1377
1378                # remove comp_mdi from unpaired_comp_metadata_items
1379                unpaired_comp_metadata_items.pop(i)
1380
1381                found_it = True
1382                break
1383
1384            if found_it:
1385                # on to the next original metadata_item
1386                continue
1387
1388            # we found no perfect match
1389            unpaired_orig_metadata_items.append(orig_mdi)
1390
1391        # now look among the unpaired remainders for key match only
1392        remove_unpaired_orig_items: list[AnnMetadataItem] = []
1393        for orig_mdi in unpaired_orig_metadata_items:
1394            for comp_idx, comp_mdi in enumerate(unpaired_comp_metadata_items):
1395                # key is required for key-only match
1396                if orig_mdi.key != comp_mdi.key:
1397                    continue
1398
1399                # found a key-only match
1400                paired_metadata_items.append((orig_mdi, comp_mdi))
1401
1402                # remove comp_mdi from unpaired_comp_metadata_items
1403                unpaired_comp_metadata_items.pop(comp_idx)
1404
1405                # make a note of unpaired_orig_metadata_item to remove later
1406                remove_unpaired_orig_items.append(orig_mdi)
1407                break
1408
1409        for remove_mdi in remove_unpaired_orig_items:
1410            unpaired_orig_metadata_items.remove(remove_mdi)
1411
1412        # compute the cost and the op_list
1413        cost: int = 0
1414        op_list: list = []
1415
1416        # mditemdel
1417        for metadata_item in unpaired_orig_metadata_items:
1418            cost += metadata_item.notation_size()
1419            op_list.append(("mditemdel", metadata_item, None, metadata_item.notation_size()))
1420
1421        # mditemins
1422        for metadata_item in unpaired_comp_metadata_items:
1423            cost += metadata_item.notation_size()
1424            op_list.append(("mditemins", None, metadata_item, metadata_item.notation_size()))
1425
1426        # mditemsub
1427        if paired_metadata_items:
1428            for metadata_itemo, metadata_itemc in paired_metadata_items:
1429                if metadata_itemo == metadata_itemc:
1430                    # if equal, avoid _annotated_metadata_item_diff call
1431                    metadata_itemsub_op, metadata_itemsub_cost = [], 0
1432                else:
1433                    metadata_itemsub_op, metadata_itemsub_cost = (
1434                        Comparison._annotated_metadata_item_diff(metadata_itemo, metadata_itemc)
1435                    )
1436                cost += metadata_itemsub_cost
1437                op_list.extend(metadata_itemsub_op)
1438
1439        return op_list, cost
1440
1441    @staticmethod
1442    @_memoize_staff_groups_set_distance
1443    def _staff_groups_set_distance(
1444        original: list[AnnStaffGroup],
1445        compare_to: list[AnnStaffGroup]
1446    ):
1447        """
1448        Gather up pairs of matching staffgroups (using start_index and end_index, in
1449        that order of importance).  If you can't find an exactly matching staffgroup,
1450        try again without end_index.
1451        original [list] -- a list of AnnStaffGroups
1452        compare_to [list] -- a list of AnnStaffGroups
1453        """
1454        paired_staff_groups: list[tuple[AnnStaffGroup, AnnStaffGroup]] = []
1455        unpaired_orig_staff_groups: list[AnnStaffGroup] = []
1456        unpaired_comp_staff_groups: list[AnnStaffGroup] = copy.copy(compare_to)
1457
1458        for orig_sg in original:
1459            fallback: AnnStaffGroup | None = None
1460            fallback_i: int = -1
1461            found_it: bool = False
1462            for i, comp_sg in enumerate(unpaired_comp_staff_groups):
1463                # start index is required for pairing
1464                if orig_sg.part_indices[0] != comp_sg.part_indices[0]:
1465                    continue
1466                if fallback is None:
1467                    fallback = comp_sg
1468                    fallback_i = i
1469
1470                # end index and symbol (brace, bracket, etc) is preferred for pairing
1471                if orig_sg.part_indices[-1] != comp_sg.part_indices[-1]:
1472                    continue
1473                if orig_sg.symbol != comp_sg.symbol:
1474                    continue
1475
1476                # found a perfect match
1477                paired_staff_groups.append((orig_sg, comp_sg))
1478
1479                # remove comp_n from unpaired_comp_staff_groups
1480                unpaired_comp_staff_groups.pop(i)
1481
1482                found_it = True
1483                break
1484
1485            if found_it:
1486                # on to the next original staff_group
1487                continue
1488
1489            # did we find a fallback (matched except for duration)?
1490            if fallback is not None:
1491                paired_staff_groups.append((orig_sg, fallback))
1492                unpaired_comp_staff_groups.pop(fallback_i)
1493                continue
1494
1495            # we found nothing
1496            unpaired_orig_staff_groups.append(orig_sg)
1497
1498        # compute the cost and the op_list
1499        cost: int = 0
1500        op_list: list = []
1501
1502        # staffgrpdel
1503        for staff_group in unpaired_orig_staff_groups:
1504            cost += staff_group.notation_size()
1505            op_list.append(("staffgrpdel", staff_group, None, staff_group.notation_size()))
1506
1507        # staffgrpins
1508        for staff_group in unpaired_comp_staff_groups:
1509            cost += staff_group.notation_size()
1510            op_list.append(("staffgrpins", None, staff_group, staff_group.notation_size()))
1511
1512        # staffgrpsub
1513        if paired_staff_groups:
1514            for staff_groupo, staff_groupc in paired_staff_groups:
1515                if staff_groupo == staff_groupc:
1516                    # if equal, avoid _annotated_staff_group_diff call
1517                    staff_groupsub_op, staff_groupsub_cost = [], 0
1518                else:
1519                    staff_groupsub_op, staff_groupsub_cost = (
1520                        Comparison._annotated_staff_group_diff(staff_groupo, staff_groupc)
1521                    )
1522                cost += staff_groupsub_cost
1523                op_list.extend(staff_groupsub_op)
1524
1525        return op_list, cost
1526
1527    @staticmethod
1528    def _voices_coupling_recursive(original: list[AnnVoice], compare_to: list[AnnVoice]):
1529        """
1530        Compare all the possible voices permutations, considering also deletion and
1531        insertion (equation on office lens).
1532        original [list] -- a list of Voice
1533        compare_to [list] -- a list of Voice
1534        """
1535        if len(original) == 0 and len(compare_to) == 0:  # stop the recursion
1536            return [], 0
1537
1538        if len(original) == 0:
1539            # insertion
1540            op_list, cost = Comparison._voices_coupling_recursive(original, compare_to[1:])
1541            # add for the inserted voice
1542            op_list.append(("voiceins", None, compare_to[0], compare_to[0].notation_size()))
1543            cost += compare_to[0].notation_size()
1544            return op_list, cost
1545
1546        if len(compare_to) == 0:
1547            # deletion
1548            op_list, cost = Comparison._voices_coupling_recursive(original[1:], compare_to)
1549            # add for the deleted voice
1550            op_list.append(("voicedel", original[0], None, original[0].notation_size()))
1551            cost += original[0].notation_size()
1552            return op_list, cost
1553
1554        cost = {}
1555        op_list = {}
1556        # deletion
1557        op_list["voicedel"], cost["voicedel"] = Comparison._voices_coupling_recursive(
1558            original[1:], compare_to
1559        )
1560        op_list["voicedel"].append(
1561            ("voicedel", original[0], None, original[0].notation_size())
1562        )
1563        cost["voicedel"] += original[0].notation_size()
1564        for i, c in enumerate(compare_to):
1565            # substitution
1566            (
1567                op_list["voicesub" + str(i)],
1568                cost["voicesub" + str(i)],
1569            ) = Comparison._voices_coupling_recursive(
1570                original[1:], compare_to[:i] + compare_to[i + 1:]
1571            )
1572            if (
1573                compare_to[0] != original[0]
1574            ):  # add the cost of the sub and the operations from inside_bar_diff
1575                op_list_inside_bar, cost_inside_bar = Comparison._inside_bars_diff_lin(
1576                    original[0].annot_notes, c.annot_notes
1577                )  # compute the distance from original[0] and compare_to[i]
1578                op_list["voicesub" + str(i)].extend(op_list_inside_bar)
1579                cost["voicesub" + str(i)] += cost_inside_bar
1580        # compute the minimum of the possibilities
1581        min_key = min(cost, key=cost.get)
1582        out = op_list[min_key], cost[min_key]
1583        return out
1584
1585    @staticmethod
1586    def annotated_scores_diff(score1: AnnScore, score2: AnnScore) -> tuple[list[tuple], int]:
1587        '''
1588        Compare two annotated scores, computing an operations list and the cost of applying those
1589        operations to the first score to generate the second score.
1590
1591        Args:
1592            score1 (`musicdiff.annotation.AnnScore`): The first annotated score to compare.
1593            score2 (`musicdiff.annotation.AnnScore`): The second annotated score to compare.
1594
1595        Returns:
1596            list[tuple], int: The operations list and the cost
1597        '''
1598        # Clear all memoizer caches, in case we are called again with different scores.
1599        # The cached results are no longer valid.
1600        Comparison._clear_memoizer_caches()
1601
1602        op_list_total: list[tuple] = []
1603        cost_total: int = 0
1604
1605        if score1.n_of_parts == score2.n_of_parts:
1606            n_of_parts = score1.n_of_parts
1607        else:
1608            # The two scores have differing number of parts.  For now, assume that
1609            # the parts are in the same order in both scores, and that the missing
1610            # parts are the ones that should have been at the end of the smaller
1611            # score. In future we could do something like we do with voices, where
1612            # we try all the combinations and compare the most-similar pairs of
1613            # parts, and the rest are considered to be the extra parts (deleted
1614            # from score1, or inserted into score2).
1615            n_of_parts = min(score1.n_of_parts, score2.n_of_parts)
1616            if score1.n_of_parts > score2.n_of_parts:
1617                # score1 has more parts that must be deleted
1618                for part_idx in range(score2.n_of_parts, score1.n_of_parts):
1619                    deleted_part = score1.part_list[part_idx]
1620                    op_list_total.append(
1621                        (
1622                            "delpart",
1623                            deleted_part,
1624                            None,
1625                            deleted_part.notation_size()
1626                        )
1627                    )
1628                    cost_total += deleted_part.notation_size()
1629            else:
1630                # score2 has more parts that must be inserted
1631                for part_idx in range(score1.n_of_parts, score2.n_of_parts):
1632                    inserted_part = score2.part_list[part_idx]
1633                    op_list_total.append(
1634                        (
1635                            "inspart",
1636                            None,
1637                            inserted_part,
1638                            inserted_part.notation_size()
1639                        )
1640                    )
1641                    cost_total += inserted_part.notation_size()
1642
1643        # iterate over parts that exist in both scores
1644        for p_number in range(n_of_parts):
1645            # compute non-common-subseq
1646            ncs = Comparison._non_common_subsequences_of_measures(
1647                score1.part_list[p_number].bar_list,
1648                score2.part_list[p_number].bar_list,
1649            )
1650            # compute blockdiff
1651            for subseq in ncs:
1652                op_list_block, cost_block = Comparison._block_diff_lin(
1653                    subseq["original"], subseq["compare_to"]
1654                )
1655                op_list_total.extend(op_list_block)
1656                cost_total += cost_block
1657
1658        # compare the staff groups
1659        groups_op_list, groups_cost = Comparison._staff_groups_set_distance(
1660            score1.staff_group_list, score2.staff_group_list
1661        )
1662        op_list_total.extend(groups_op_list)
1663        cost_total += groups_cost
1664
1665        # compare the metadata items
1666        mditems_op_list, mditems_cost = Comparison._metadata_items_set_distance(
1667            score1.metadata_items_list, score2.metadata_items_list
1668        )
1669        op_list_total.extend(mditems_op_list)
1670        cost_total += mditems_cost
1671
1672        # Add the cost of any syntax errors in score1 that were fixed during parsing.
1673        # Ignore enough syntax errors to keep OMR-NED <= 1.0, for consistency.
1674        total_syms: int = score1.notation_size() + score2.notation_size()
1675        cost_plus_errors: int = cost_total + score1.num_syntax_errors_fixed
1676        if cost_plus_errors > total_syms:
1677            adjustment: int = cost_plus_errors - total_syms
1678            score1.num_syntax_errors_fixed -= adjustment
1679
1680        cost_total += score1.num_syntax_errors_fixed
1681
1682        return op_list_total, cost_total

class EvaluationMetrics: View Source

30class EvaluationMetrics:
31    def __init__(
32        self,
33        gt_path: Path,
34        pred_path: Path,
35        gt_numsyms: int,
36        pred_numsyms: int,
37        omr_edit_distance: int,
38        edit_distances_dict: dict[str, int],
39        omr_ned: float
40    ):
41        self.gt_path: Path = gt_path
42        self.pred_path: Path = pred_path
43        self.gt_numsyms: int = gt_numsyms
44        self.pred_numsyms: int = pred_numsyms
45        self.omr_edit_distance: int = omr_edit_distance
46        self.edit_distances_dict: dict[str, int] = edit_distances_dict
47        self.omr_ned: float = omr_ned

EvaluationMetrics( gt_path: pathlib.Path, pred_path: pathlib.Path, gt_numsyms: int, pred_numsyms: int, omr_edit_distance: int, edit_distances_dict: dict[str, int], omr_ned: float) View Source

31    def __init__(
32        self,
33        gt_path: Path,
34        pred_path: Path,
35        gt_numsyms: int,
36        pred_numsyms: int,
37        omr_edit_distance: int,
38        edit_distances_dict: dict[str, int],
39        omr_ned: float
40    ):
41        self.gt_path: Path = gt_path
42        self.pred_path: Path = pred_path
43        self.gt_numsyms: int = gt_numsyms
44        self.pred_numsyms: int = pred_numsyms
45        self.omr_edit_distance: int = omr_edit_distance
46        self.edit_distances_dict: dict[str, int] = edit_distances_dict
47        self.omr_ned: float = omr_ned

gt_path: pathlib.Path

pred_path: pathlib.Path

gt_numsyms: int

pred_numsyms: int

omr_edit_distance: int

edit_distances_dict: dict[str, int]

omr_ned: float

class Comparison: View Source

 150class Comparison:
 151    _memoizer_mem: dict = {}
 152
 153    @staticmethod
 154    def _clear_memoizer_caches():
 155        Comparison._memoizer_mem = {}
 156
 157    @staticmethod
 158    def _myers_diff(a_lines, b_lines):
 159        # Myers algorithm for LCS of bars (instead of the recursive algorithm in section 3.2)
 160        # This marks the farthest-right point along each diagonal in the edit
 161        # graph, along with the history that got it there
 162        Frontier = namedtuple("Frontier", ["x", "history"])
 163        frontier = {1: Frontier(0, [])}
 164
 165        a_max = len(a_lines)
 166        b_max = len(b_lines)
 167        for d in range(0, a_max + b_max + 1):
 168            for k in range(-d, d + 1, 2):
 169                # This determines whether our next search point will be going down
 170                # in the edit graph, or to the right.
 171                #
 172                # The intuition for this is that we should go down if we're on the
 173                # left edge (k == -d) to make sure that the left edge is fully
 174                # explored.
 175                #
 176                # If we aren't on the top (k != d), then only go down if going down
 177                # would take us to territory that hasn't sufficiently been explored
 178                # yet.
 179                go_down = k == -d or (k != d and frontier[k - 1].x < frontier[k + 1].x)
 180
 181                # Figure out the starting point of this iteration. The diagonal
 182                # offsets come from the geometry of the edit grid - if you're going
 183                # down, your diagonal is lower, and if you're going right, your
 184                # diagonal is higher.
 185                if go_down:
 186                    old_x, history = frontier[k + 1]
 187                    x = old_x
 188                else:
 189                    old_x, history = frontier[k - 1]
 190                    x = old_x + 1
 191
 192                # We want to avoid modifying the old history, since some other step
 193                # may decide to use it.
 194                history = history[:]
 195                y = x - k
 196
 197                # We start at the invalid point (0, 0) - we should only start building
 198                # up history when we move off of it.
 199                if 1 <= y <= b_max and go_down:
 200                    history.append((1, b_lines[y - 1][1]))  # add comparetostep
 201                elif 1 <= x <= a_max:
 202                    history.append((0, a_lines[x - 1][1]))  # add originalstep
 203
 204                # Chew up as many diagonal moves as we can - these correspond to common lines,
 205                # and they're considered "free" by the algorithm because we want to maximize
 206                # the number of these in the output.
 207                while x < a_max and y < b_max and a_lines[x][0] == b_lines[y][0]:
 208                    x += 1
 209                    y += 1
 210                    history.append((2, a_lines[x - 1][1]))  # add equal step
 211
 212                if x >= a_max and y >= b_max:
 213                    # If we're here, then we've traversed through the bottom-left corner,
 214                    # and are done.
 215                    return np.array(history)
 216
 217                frontier[k] = Frontier(x, history)
 218
 219        assert False, "Could not find edit script"
 220
 221    @staticmethod
 222    def _non_common_subsequences_myers(original, compare_to):
 223        # Both original and compare_to are list of lists, or numpy arrays with 2 columns.
 224        # This is necessary because bars need two representation at the same time.
 225        # One without the id (for comparison), and one with the id (to retrieve the bar
 226        # at the end).
 227
 228        # get the list of operations
 229        op_list = Comparison._myers_diff(
 230            np.array(original, dtype=np.int64), np.array(compare_to, dtype=np.int64)
 231        )[::-1]
 232        # retrieve the non common subsequences
 233        non_common_subsequences = []
 234        non_common_subsequences.append({"original": [], "compare_to": []})
 235        ind = 0
 236        for op in op_list[::-1]:
 237            if op[0] == 2:  # equal
 238                non_common_subsequences.append({"original": [], "compare_to": []})
 239                ind += 1
 240            elif op[0] == 0:  # original step
 241                non_common_subsequences[ind]["original"].append(op[1])
 242            elif op[0] == 1:  # compare to step
 243                non_common_subsequences[ind]["compare_to"].append(op[1])
 244        # remove the empty dict from the list
 245        non_common_subsequences = [
 246            s for s in non_common_subsequences if s != {"original": [], "compare_to": []}
 247        ]
 248        return non_common_subsequences
 249
 250    @staticmethod
 251    def _non_common_subsequences_of_measures(original_m, compare_to_m):
 252        # Take the hash for each measure to run faster comparison
 253        # We need two hashes: one that is independent of the IDs (precomputed_str, for comparison),
 254        # and one that contains the IDs (precomputed_repr, to retrieve the correct measure after
 255        # computation)
 256        original_int = [[o.precomputed_str, o.precomputed_repr] for o in original_m]
 257        compare_to_int = [[c.precomputed_str, c.precomputed_repr] for c in compare_to_m]
 258        ncs = Comparison._non_common_subsequences_myers(original_int, compare_to_int)
 259        # retrieve the original pointers to measures
 260        new_out = []
 261        for e in ncs:
 262            new_out.append({})
 263            for k in e.keys():
 264                new_out[-1][k] = []
 265                for repr_hash in e[k]:
 266                    if k == "original":
 267                        new_out[-1][k].append(
 268                            next(m for m in original_m if m.precomputed_repr == repr_hash)
 269                        )
 270                    else:
 271                        new_out[-1][k].append(
 272                            next(m for m in compare_to_m if m.precomputed_repr == repr_hash)
 273                        )
 274
 275        return new_out
 276
 277    @staticmethod
 278    @_memoize_pitches_lev_diff
 279    def _pitches_levenshtein_diff(
 280        original: list[tuple[str, str, bool]],
 281        compare_to: list[tuple[str, str, bool]],
 282        noteNode1: AnnNote,
 283        noteNode2: AnnNote,
 284        ids: tuple[int, int]
 285    ):
 286        """
 287        Compute the levenshtein distance between two sequences of pitches.
 288        Arguments:
 289            original {list} -- list of pitches
 290            compare_to {list} -- list of pitches
 291            noteNode1 {annotatedNote} --for referencing
 292            noteNode2 {annotatedNote} --for referencing
 293            ids {tuple} -- a tuple of 2 elements with the indices of the notes considered
 294        """
 295        if len(original) == 0 and len(compare_to) == 0:
 296            return [], 0
 297
 298        if len(original) == 0:
 299            op_list, cost = Comparison._pitches_levenshtein_diff(
 300                original, compare_to[1:], noteNode1, noteNode2, (ids[0], ids[1] + 1)
 301            )
 302            op_list.append(
 303                ("inspitch", noteNode1, noteNode2, M21Utils.pitch_size(compare_to[0]), ids)
 304            )
 305            cost += M21Utils.pitch_size(compare_to[0])
 306            return op_list, cost
 307
 308        if len(compare_to) == 0:
 309            op_list, cost = Comparison._pitches_levenshtein_diff(
 310                original[1:], compare_to, noteNode1, noteNode2, (ids[0] + 1, ids[1])
 311            )
 312            op_list.append(
 313                ("delpitch", noteNode1, noteNode2, M21Utils.pitch_size(original[0]), ids)
 314            )
 315            cost += M21Utils.pitch_size(original[0])
 316            return op_list, cost
 317
 318        # compute the cost and the op_list for the many possibilities of recursion
 319        cost_dict = {}
 320        op_list_dict = {}
 321        # del-pitch
 322        op_list_dict["delpitch"], cost_dict["delpitch"] = Comparison._pitches_levenshtein_diff(
 323            original[1:], compare_to, noteNode1, noteNode2, (ids[0] + 1, ids[1])
 324        )
 325        cost_dict["delpitch"] += M21Utils.pitch_size(original[0])
 326        op_list_dict["delpitch"].append(
 327            ("delpitch", noteNode1, noteNode2, M21Utils.pitch_size(original[0]), ids)
 328        )
 329        # ins-pitch
 330        op_list_dict["inspitch"], cost_dict["inspitch"] = Comparison._pitches_levenshtein_diff(
 331            original, compare_to[1:], noteNode1, noteNode2, (ids[0], ids[1] + 1)
 332        )
 333        cost_dict["inspitch"] += M21Utils.pitch_size(compare_to[0])
 334        op_list_dict["inspitch"].append(
 335            ("inspitch", noteNode1, noteNode2, M21Utils.pitch_size(compare_to[0]), ids)
 336        )
 337        # edit-pitch
 338        op_list_dict["editpitch"], cost_dict["editpitch"] = Comparison._pitches_levenshtein_diff(
 339            original[1:], compare_to[1:], noteNode1, noteNode2, (ids[0] + 1, ids[1] + 1)
 340        )
 341        if original[0] == compare_to[0]:  # to avoid perform the pitch_diff
 342            pitch_diff_op_list = []
 343            pitch_diff_cost = 0
 344        else:
 345            pitch_diff_op_list, pitch_diff_cost = Comparison._pitches_diff(
 346                original[0], compare_to[0], noteNode1, noteNode2, (ids[0], ids[1])
 347            )
 348        cost_dict["editpitch"] += pitch_diff_cost
 349        op_list_dict["editpitch"].extend(pitch_diff_op_list)
 350        # compute the minimum of the possibilities
 351        min_key = min(cost_dict, key=lambda k: cost_dict[k])
 352        out = op_list_dict[min_key], cost_dict[min_key]
 353        return out
 354
 355    @staticmethod
 356    def _pitches_diff(pitch1, pitch2, noteNode1, noteNode2, ids):
 357        """
 358        Compute the differences between two pitch (definition from the paper).
 359        a pitch consist of a tuple: pitch name (letter+number), accidental, tie.
 360        param : pitch1. The music_notation_repr tuple of note1
 361        param : pitch2. The music_notation_repr tuple of note2
 362        param : noteNode1. The noteNode where pitch1 belongs
 363        param : noteNode2. The noteNode where pitch2 belongs
 364        param : ids. (id_from_note1,id_from_note2) The indices of the notes in case of a chord
 365        Returns:
 366            [list] -- the list of differences
 367            [int] -- the cost of diff
 368        """
 369        cost = 0
 370        op_list = []
 371        # add for pitch name differences
 372        if pitch1[0] != pitch2[0]:
 373            cost += 1
 374            # TODO: select the note in a more precise way in case of a chord
 375            # rest to note
 376            if (pitch1[0][0] == "R") != (pitch2[0][0] == "R"):  # xor
 377                op_list.append(("pitchtypeedit", noteNode1, noteNode2, 1, ids))
 378            else:  # they are two notes
 379                op_list.append(("pitchnameedit", noteNode1, noteNode2, 1, ids))
 380
 381        # add for the accidentals
 382        if pitch1[1] != pitch2[1]:  # if the accidental is different
 383            if pitch1[1] == "None":
 384                assert pitch2[1] != "None"
 385                cost += 1
 386                op_list.append(("accidentins", noteNode1, noteNode2, 1, ids))
 387            elif pitch2[1] == "None":
 388                assert pitch1[1] != "None"
 389                cost += 1
 390                op_list.append(("accidentdel", noteNode1, noteNode2, 1, ids))
 391            else:  # a different type of alteration is present
 392                cost += 2  # delete then add
 393                op_list.append(("accidentedit", noteNode1, noteNode2, 2, ids))
 394        # add for the ties
 395        if pitch1[2] != pitch2[2]:
 396            # exclusive or. Add if one is tied and not the other.
 397            # probably to revise for chords
 398            cost += 1
 399            if pitch1[2]:
 400                assert not pitch2[2]
 401                op_list.append(("tiedel", noteNode1, noteNode2, 1, ids))
 402            elif pitch2[2]:
 403                assert not pitch1[2]
 404                op_list.append(("tieins", noteNode1, noteNode2, 1, ids))
 405        return op_list, cost
 406
 407    @staticmethod
 408    @_memoize_block_diff_lin
 409    def _block_diff_lin(original, compare_to):
 410        if len(original) == 0 and len(compare_to) == 0:
 411            return [], 0
 412
 413        if len(original) == 0:
 414            op_list, cost = Comparison._block_diff_lin(original, compare_to[1:])
 415            cost += compare_to[0].notation_size()
 416            op_list.append(("insbar", None, compare_to[0], compare_to[0].notation_size()))
 417            return op_list, cost
 418
 419        if len(compare_to) == 0:
 420            op_list, cost = Comparison._block_diff_lin(original[1:], compare_to)
 421            cost += original[0].notation_size()
 422            op_list.append(("delbar", original[0], None, original[0].notation_size()))
 423            return op_list, cost
 424
 425        # compute the cost and the op_list for the many possibilities of recursion
 426        cost_dict = {}
 427        op_list_dict = {}
 428        # del-bar
 429        op_list_dict["delbar"], cost_dict["delbar"] = Comparison._block_diff_lin(
 430            original[1:], compare_to
 431        )
 432        cost_dict["delbar"] += original[0].notation_size()
 433        op_list_dict["delbar"].append(
 434            ("delbar", original[0], None, original[0].notation_size())
 435        )
 436        # ins-bar
 437        op_list_dict["insbar"], cost_dict["insbar"] = Comparison._block_diff_lin(
 438            original, compare_to[1:]
 439        )
 440        cost_dict["insbar"] += compare_to[0].notation_size()
 441        op_list_dict["insbar"].append(
 442            ("insbar", None, compare_to[0], compare_to[0].notation_size())
 443        )
 444        # edit-bar
 445        op_list_dict["editbar"], cost_dict["editbar"] = Comparison._block_diff_lin(
 446            original[1:], compare_to[1:]
 447        )
 448        if (
 449            original[0] == compare_to[0]
 450        ):  # to avoid performing the _voices_coupling_recursive/_notes_set_distance
 451            # if it's not needed
 452            inside_bar_op_list = []
 453            inside_bar_cost = 0
 454        else:
 455            # diff the bar extras (like _notes_set_distance, but with lists of AnnExtras
 456            # instead of lists of AnnNotes)
 457            extras_op_list, extras_cost = Comparison._extras_set_distance(
 458                original[0].extras_list, compare_to[0].extras_list
 459            )
 460
 461            # diff the bar lyrics (with lists of AnnLyrics instead of lists of AnnExtras)
 462            lyrics_op_list, lyrics_cost = Comparison._lyrics_diff_lin(
 463                original[0].lyrics_list, compare_to[0].lyrics_list
 464            )
 465
 466            if original[0].includes_voicing:
 467                # run the voice coupling algorithm, and add to inside_bar_op_list
 468                # and inside_bar_cost
 469                inside_bar_op_list, inside_bar_cost = (
 470                    Comparison._voices_coupling_recursive(
 471                        original[0].voices_list, compare_to[0].voices_list
 472                    )
 473                )
 474            else:
 475                # run the set distance algorithm, and add to inside_bar_op_list
 476                # and inside_bar_cost
 477                inside_bar_op_list, inside_bar_cost = Comparison._notes_set_distance(
 478                    original[0].annot_notes, compare_to[0].annot_notes
 479                )
 480
 481            inside_bar_op_list.extend(extras_op_list)
 482            inside_bar_cost += extras_cost
 483            inside_bar_op_list.extend(lyrics_op_list)
 484            inside_bar_cost += lyrics_cost
 485
 486        cost_dict["editbar"] += inside_bar_cost
 487        op_list_dict["editbar"].extend(inside_bar_op_list)
 488        # compute the minimum of the possibilities
 489        min_key = min(cost_dict, key=lambda k: cost_dict[k])
 490        out = op_list_dict[min_key], cost_dict[min_key]
 491        return out
 492
 493    @staticmethod
 494    @_memoize_lyrics_diff_lin
 495    def _lyrics_diff_lin(original, compare_to):
 496        # original and compare to are two lists of AnnLyric
 497        if len(original) == 0 and len(compare_to) == 0:
 498            return [], 0
 499
 500        if len(original) == 0:
 501            cost = 0
 502            op_list, cost = Comparison._lyrics_diff_lin(original, compare_to[1:])
 503            op_list.append(("lyricins", None, compare_to[0], compare_to[0].notation_size()))
 504            cost += compare_to[0].notation_size()
 505            return op_list, cost
 506
 507        if len(compare_to) == 0:
 508            cost = 0
 509            op_list, cost = Comparison._lyrics_diff_lin(original[1:], compare_to)
 510            op_list.append(("lyricdel", original[0], None, original[0].notation_size()))
 511            cost += original[0].notation_size()
 512            return op_list, cost
 513
 514        # compute the cost and the op_list for the many possibilities of recursion
 515        cost = {}
 516        op_list = {}
 517        # lyricdel
 518        op_list["lyricdel"], cost["lyricdel"] = Comparison._lyrics_diff_lin(
 519            original[1:], compare_to
 520        )
 521        cost["lyricdel"] += original[0].notation_size()
 522        op_list["lyricdel"].append(
 523            ("lyricdel", original[0], None, original[0].notation_size())
 524        )
 525        # lyricins
 526        op_list["lyricins"], cost["lyricins"] = Comparison._lyrics_diff_lin(
 527            original, compare_to[1:]
 528        )
 529        cost["lyricins"] += compare_to[0].notation_size()
 530        op_list["lyricins"].append(
 531            ("lyricins", None, compare_to[0], compare_to[0].notation_size())
 532        )
 533        # lyricsub
 534        op_list["lyricsub"], cost["lyricsub"] = Comparison._lyrics_diff_lin(
 535            original[1:], compare_to[1:]
 536        )
 537        if (
 538            original[0] == compare_to[0]
 539        ):  # avoid call another function if they are equal
 540            lyricsub_op, lyricsub_cost = [], 0
 541        else:
 542            lyricsub_op, lyricsub_cost = (
 543                Comparison._annotated_lyric_diff(original[0], compare_to[0])
 544            )
 545        cost["lyricsub"] += lyricsub_cost
 546        op_list["lyricsub"].extend(lyricsub_op)
 547        # compute the minimum of the possibilities
 548        min_key = min(cost, key=cost.get)
 549        out = op_list[min_key], cost[min_key]
 550        return out
 551
 552    @staticmethod
 553    def _strings_levenshtein_distance(str1: str, str2: str):
 554        counter: dict = {"+": 0, "-": 0}
 555        distance: int = 0
 556        for edit_code in ndiff(str1, str2):
 557            if edit_code[0] == " ":
 558                distance += max(counter.values())
 559                counter = {"+": 0, "-": 0}
 560            else:
 561                counter[edit_code[0]] += 1
 562        distance += max(counter.values())
 563        return distance
 564
 565    @staticmethod
 566    def _areDifferentEnough(off1: OffsetQL | None, off2: OffsetQL | None) -> bool:
 567        if off1 == off2:
 568            return False
 569
 570        # this should never happen, but...
 571        if off1 is None or off2 is None:
 572            if off1 is None and off2 is not None:
 573                return True
 574            if off1 is not None and off2 is None:
 575                return True
 576            return False  # both are None, therefore not different at all
 577
 578        diff: OffsetQL = off1 - off2
 579        if diff < 0:
 580            diff = -diff
 581
 582        if diff > 0.0001:
 583            return True
 584        return False
 585
 586    @staticmethod
 587    def _annotated_extra_diff(annExtra1: AnnExtra, annExtra2: AnnExtra):
 588        """
 589        Compute the differences between two annotated extras.
 590        Each annotated extra consists of three values: content, offset, and duration
 591        """
 592        cost = 0
 593        op_list = []
 594
 595        # add for the content
 596        if annExtra1.content != annExtra2.content:
 597            content_cost: int = (
 598                Comparison._strings_levenshtein_distance(
 599                    annExtra1.content or '',
 600                    annExtra2.content or ''
 601                )
 602            )
 603            cost += content_cost
 604            op_list.append(("extracontentedit", annExtra1, annExtra2, content_cost))
 605
 606        # add for the symbolic (cost 2: delete one symbol, add the other)
 607        if annExtra1.symbolic != annExtra2.symbolic:
 608            cost += 2
 609            op_list.append(("extrasymboledit", annExtra1, annExtra2, 2))
 610
 611        # add for the infodict
 612        if annExtra1.infodict != annExtra2.infodict:
 613            info_cost: int = 0
 614            # handle everything in annExtra1 (whether or not it is in annExtra2)
 615            for k, v in annExtra1.infodict.items():
 616                if k not in annExtra2.infodict:
 617                    # not in annExtra2: delete a symbol
 618                    info_cost += 1
 619                elif v != annExtra2.infodict[k]:
 620                    # different in annExtra2: delete a symbol, add a symbol
 621                    info_cost += 2
 622            # handle everything in annExtra2 that is not in annExtra1
 623            for k in annExtra2.infodict:
 624                if k not in annExtra1.infodict:
 625                    # add a symbol
 626                    info_cost += 1
 627            cost += info_cost
 628            op_list.append(("extrainfoedit", annExtra1, annExtra2, info_cost))
 629
 630        # add for the offset
 631        # Note: offset here is a float, and some file formats have only four
 632        # decimal places of precision.  So we should not compare exactly here.
 633        if Comparison._areDifferentEnough(annExtra1.offset, annExtra2.offset):
 634            cost += 1
 635            op_list.append(("extraoffsetedit", annExtra1, annExtra2, 1))
 636
 637        # add for the duration
 638        # Note: duration here is a float, and some file formats have only four
 639        # decimal places of precision.  So we should not compare exactly here.
 640        if Comparison._areDifferentEnough(annExtra1.duration, annExtra2.duration):
 641            cost += 1
 642            op_list.append(("extradurationedit", annExtra1, annExtra2, 1))
 643
 644        # add for the style
 645        if annExtra1.styledict != annExtra2.styledict:
 646            cost += 1  # someday we might count different items in the styledict
 647            op_list.append(("extrastyleedit", annExtra1, annExtra2, 1))
 648
 649        return op_list, cost
 650
 651    @staticmethod
 652    def _annotated_lyric_diff(annLyric1: AnnLyric, annLyric2: AnnLyric):
 653        """
 654        Compute the differences between two annotated lyrics.
 655        Each annotated lyric consists of five values: lyric, verse_id, offset, duration,
 656        and styledict.
 657        """
 658        cost = 0
 659        op_list = []
 660
 661        # add for the content
 662        if annLyric1.lyric != annLyric2.lyric:
 663            content_cost: int = (
 664                Comparison._strings_levenshtein_distance(annLyric1.lyric, annLyric2.lyric)
 665            )
 666            cost += content_cost
 667            op_list.append(("lyricedit", annLyric1, annLyric2, content_cost))
 668
 669        # add for the number
 670        if annLyric1.number != annLyric2.number:
 671            number_cost: int
 672            if annLyric1.number == 0 or annLyric2.number == 0:
 673                # add or delete number
 674                number_cost = 1
 675            else:
 676                # add and delete number
 677                number_cost = 2
 678            cost += number_cost
 679            op_list.append(("lyricnumedit", annLyric1, annLyric2, number_cost))
 680
 681        # add for the identifier
 682        if annLyric1.identifier != annLyric2.identifier:
 683            # someday we might do a Levenshtein distance of the two ids
 684            id_cost: int
 685            if not annLyric1.identifier or not annLyric1.identifier:
 686                # add or delete identifier
 687                id_cost = 1
 688            else:
 689                # add and delete identifier
 690                id_cost = 2
 691            cost += id_cost
 692            op_list.append(("lyricidedit", annLyric1, annLyric2, id_cost))
 693
 694        # add for the offset
 695        # Note: offset here is a float, and some file formats have only four
 696        # decimal places of precision.  So we should not compare exactly here.
 697        if Comparison._areDifferentEnough(annLyric1.offset, annLyric2.offset):
 698            cost += 1
 699            op_list.append(("lyricoffsetedit", annLyric1, annLyric2, 1))
 700
 701        # add for the style
 702        if annLyric1.styledict != annLyric2.styledict:
 703            cost += 1  # someday we might count different items in the styledict
 704            op_list.append(("lyricstyleedit", annLyric1, annLyric2, 1))
 705
 706        return op_list, cost
 707
 708    @staticmethod
 709    def _annotated_metadata_item_diff(
 710        annMetadataItem1: AnnMetadataItem,
 711        annMetadataItem2: AnnMetadataItem
 712    ):
 713        """
 714        Compute the differences between two annotated metadata items.
 715        Each annotated metadata item has two values: key: str, value: t.Any,
 716        """
 717        cost = 0
 718        op_list = []
 719
 720        # add for the key (this never adds anything because we don't compare
 721        # items that don't have the same key).
 722        if annMetadataItem1.key != annMetadataItem2.key:
 723            key_cost: int = (
 724                Comparison._strings_levenshtein_distance(
 725                    annMetadataItem1.key,
 726                    annMetadataItem2.key
 727                )
 728            )
 729            cost += key_cost
 730            op_list.append(("mditemkeyedit", annMetadataItem1, annMetadataItem2, key_cost))
 731
 732        # add for the value
 733        if annMetadataItem1.value != annMetadataItem2.value:
 734            value_cost: int = (
 735                Comparison._strings_levenshtein_distance(
 736                    str(annMetadataItem1.value),
 737                    str(annMetadataItem2.value)
 738                )
 739            )
 740            cost += value_cost
 741            op_list.append(
 742                ("mditemvalueedit", annMetadataItem1, annMetadataItem2, value_cost)
 743            )
 744
 745        return op_list, cost
 746
 747    @staticmethod
 748    def _annotated_staff_group_diff(annStaffGroup1: AnnStaffGroup, annStaffGroup2: AnnStaffGroup):
 749        """
 750        Compute the differences between two annotated staff groups.
 751        Each annotated staff group consists of five values: name, abbreviation,
 752        symbol, barTogether, part_indices.
 753        """
 754        cost = 0
 755        op_list = []
 756
 757        # add for the name
 758        if annStaffGroup1.name != annStaffGroup2.name:
 759            name_cost: int = (
 760                Comparison._strings_levenshtein_distance(
 761                    annStaffGroup1.name,
 762                    annStaffGroup2.name
 763                )
 764            )
 765            cost += name_cost
 766            op_list.append(("staffgrpnameedit", annStaffGroup1, annStaffGroup2, name_cost))
 767
 768        # add for the abbreviation
 769        if annStaffGroup1.abbreviation != annStaffGroup2.abbreviation:
 770            abbreviation_cost: int = (
 771                Comparison._strings_levenshtein_distance(
 772                    annStaffGroup1.abbreviation,
 773                    annStaffGroup2.abbreviation
 774                )
 775            )
 776            cost += abbreviation_cost
 777            op_list.append((
 778                "staffgrpabbreviationedit",
 779                annStaffGroup1,
 780                annStaffGroup2,
 781                abbreviation_cost
 782            ))
 783
 784        # add for the symbol
 785        if annStaffGroup1.symbol != annStaffGroup2.symbol:
 786            symbol_cost: int
 787            if not annStaffGroup1.symbol or not annStaffGroup2.symbol:
 788                # add or delete symbol
 789                symbol_cost = 1
 790            else:
 791                # add and delete symbol
 792                symbol_cost = 2
 793            cost += symbol_cost
 794            op_list.append(
 795                ("staffgrpsymboledit", annStaffGroup1, annStaffGroup2, symbol_cost)
 796            )
 797
 798        # add for barTogether
 799        if annStaffGroup1.barTogether != annStaffGroup2.barTogether:
 800            barTogether_cost: int = 1
 801            cost += barTogether_cost
 802            op_list.append(
 803                ("staffgrpbartogetheredit", annStaffGroup1, annStaffGroup2, barTogether_cost)
 804            )
 805
 806        # add for partIndices (sorted list of int)
 807        if annStaffGroup1.part_indices != annStaffGroup2.part_indices:
 808            partIndices_cost: int = 0
 809            if annStaffGroup1.part_indices[0] != annStaffGroup2.part_indices[0]:
 810                partIndices_cost += 1  # vertical start
 811            if annStaffGroup1.part_indices[-1] != annStaffGroup2.part_indices[-1]:
 812                partIndices_cost += 1  # vertical height
 813            if partIndices_cost == 0:
 814                # should never get here, but we have to have a cost
 815                partIndices_cost = 1
 816            cost += partIndices_cost
 817            op_list.append(
 818                ("staffgrppartindicesedit", annStaffGroup1, annStaffGroup2, partIndices_cost)
 819            )
 820
 821        return op_list, cost
 822
 823    @staticmethod
 824    @_memoize_inside_bars_diff_lin
 825    def _inside_bars_diff_lin(original, compare_to):
 826        # original and compare to are two lists of annotatedNote
 827        if len(original) == 0 and len(compare_to) == 0:
 828            return [], 0
 829
 830        if len(original) == 0:
 831            cost = 0
 832            op_list, cost = Comparison._inside_bars_diff_lin(original, compare_to[1:])
 833            op_list.append(("noteins", None, compare_to[0], compare_to[0].notation_size()))
 834            cost += compare_to[0].notation_size()
 835            return op_list, cost
 836
 837        if len(compare_to) == 0:
 838            cost = 0
 839            op_list, cost = Comparison._inside_bars_diff_lin(original[1:], compare_to)
 840            op_list.append(("notedel", original[0], None, original[0].notation_size()))
 841            cost += original[0].notation_size()
 842            return op_list, cost
 843
 844        # compute the cost and the op_list for the many possibilities of recursion
 845        cost = {}
 846        op_list = {}
 847        # notedel
 848        op_list["notedel"], cost["notedel"] = Comparison._inside_bars_diff_lin(
 849            original[1:], compare_to
 850        )
 851        cost["notedel"] += original[0].notation_size()
 852        op_list["notedel"].append(
 853            ("notedel", original[0], None, original[0].notation_size())
 854        )
 855        # noteins
 856        op_list["noteins"], cost["noteins"] = Comparison._inside_bars_diff_lin(
 857            original, compare_to[1:]
 858        )
 859        cost["noteins"] += compare_to[0].notation_size()
 860        op_list["noteins"].append(
 861            ("noteins", None, compare_to[0], compare_to[0].notation_size())
 862        )
 863        # notesub
 864        op_list["notesub"], cost["notesub"] = Comparison._inside_bars_diff_lin(
 865            original[1:], compare_to[1:]
 866        )
 867        if (
 868            original[0] == compare_to[0]
 869        ):  # avoid call another function if they are equal
 870            notesub_op, notesub_cost = [], 0
 871        else:
 872            notesub_op, notesub_cost = Comparison._annotated_note_diff(original[0], compare_to[0])
 873        cost["notesub"] += notesub_cost
 874        op_list["notesub"].extend(notesub_op)
 875        # compute the minimum of the possibilities
 876        min_key = min(cost, key=cost.get)
 877        out = op_list[min_key], cost[min_key]
 878        return out
 879
 880    @staticmethod
 881    def _annotated_note_diff(annNote1: AnnNote, annNote2: AnnNote):
 882        """
 883        Compute the differences between two annotated notes.
 884        Each annotated note consist in a 5tuple (pitches, notehead, dots, beamings, tuplets)
 885        where pitches is a list.
 886        Arguments:
 887            noteNode1 {[AnnNote]} -- original AnnNote
 888            noteNode2 {[AnnNote]} -- compare_to AnnNote
 889        """
 890        cost = 0
 891        op_list = []
 892        # add for the pitches
 893        # if they are equal
 894        if annNote1.pitches == annNote2.pitches:
 895            op_list_pitch, cost_pitch = [], 0
 896        else:
 897            # pitches diff is computed using Levenshtein distances (they are already ordered)
 898            op_list_pitch, cost_pitch = Comparison._pitches_levenshtein_diff(
 899                annNote1.pitches, annNote2.pitches, annNote1, annNote2, (0, 0)
 900            )
 901        op_list.extend(op_list_pitch)
 902        cost += cost_pitch
 903        # add for the notehead
 904        if annNote1.note_head != annNote2.note_head:
 905            # delete one note head, add the other (this isn't noteshape, this is
 906            # just quarter-note note head vs half-note note head, etc)
 907            cost += 2
 908            op_list.append(("headedit", annNote1, annNote2, 2))
 909        # add for the dots
 910        if annNote1.dots != annNote2.dots:
 911            # add one for each added (or deleted) dot
 912            dots_diff = abs(annNote1.dots - annNote2.dots)
 913            cost += dots_diff
 914            if annNote1.dots > annNote2.dots:
 915                op_list.append(("dotdel", annNote1, annNote2, dots_diff))
 916            else:
 917                op_list.append(("dotins", annNote1, annNote2, dots_diff))
 918        if annNote1.graceType != annNote2.graceType:
 919            # accented vs unaccented vs not a grace note (delete the wrong, add the right)
 920            cost += 2
 921            op_list.append(("graceedit", annNote1, annNote2, 2))
 922        if annNote1.graceSlash != annNote2.graceSlash:
 923            # add or delete the slash
 924            cost += 1
 925            op_list.append(("graceslashedit", annNote1, annNote2, 1))
 926        # add for the beamings
 927        if annNote1.beamings != annNote2.beamings:
 928            beam_op_list, beam_cost = Comparison._beamtuplet_levenshtein_diff(
 929                annNote1.beamings, annNote2.beamings, annNote1, annNote2, "beam"
 930            )
 931            op_list.extend(beam_op_list)
 932            cost += beam_cost
 933        # add for the tuplet types
 934        if annNote1.tuplets != annNote2.tuplets:
 935            tuplet_op_list, tuplet_cost = Comparison._beamtuplet_levenshtein_diff(
 936                annNote1.tuplets, annNote2.tuplets, annNote1, annNote2, "tuplet"
 937            )
 938            op_list.extend(tuplet_op_list)
 939            cost += tuplet_cost
 940        # add for the tuplet info
 941        if annNote1.tuplet_info != annNote2.tuplet_info:
 942            tuplet_info_op_list, tuplet_info_cost = Comparison._beamtuplet_levenshtein_diff(
 943                annNote1.tuplet_info, annNote2.tuplet_info, annNote1, annNote2, "tuplet"
 944            )
 945            op_list.extend(tuplet_info_op_list)
 946            cost += tuplet_info_cost
 947        # add for the articulations
 948        if annNote1.articulations != annNote2.articulations:
 949            artic_op_list, artic_cost = Comparison._generic_levenshtein_diff(
 950                annNote1.articulations,
 951                annNote2.articulations,
 952                annNote1,
 953                annNote2,
 954                "articulation",
 955            )
 956            op_list.extend(artic_op_list)
 957            cost += artic_cost
 958        # add for the expressions
 959        if annNote1.expressions != annNote2.expressions:
 960            expr_op_list, expr_cost = Comparison._generic_levenshtein_diff(
 961                annNote1.expressions,
 962                annNote2.expressions,
 963                annNote1,
 964                annNote2,
 965                "expression",
 966            )
 967            op_list.extend(expr_op_list)
 968            cost += expr_cost
 969
 970        # add for gap from previous note or start of measure if first note in measure
 971        # (i.e. horizontal position shift)
 972        if annNote1.gap_dur != annNote2.gap_dur:
 973            # in all cases, the edit is a simple horizontal shift of the note
 974            cost += 1
 975            if annNote1.gap_dur == 0:
 976                op_list.append(("insspace", annNote1, annNote2, 1))
 977            elif annNote2.gap_dur == 0:
 978                op_list.append(("delspace", annNote1, annNote2, 1))
 979            else:
 980                # neither is zero
 981                op_list.append(("editspace", annNote1, annNote2, 1))
 982
 983        # add for noteshape
 984        if annNote1.noteshape != annNote2.noteshape:
 985            # always delete existing note shape and add the new one
 986            cost += 2
 987            op_list.append(("editnoteshape", annNote1, annNote2, 2))
 988        # add for noteheadFill
 989        if annNote1.noteheadFill != annNote2.noteheadFill:
 990            # always delete existing note fill and add the new one
 991            cost += 2
 992            op_list.append(("editnoteheadfill", annNote1, annNote2, 2))
 993        # add for noteheadParenthesis (True or False)
 994        if annNote1.noteheadParenthesis != annNote2.noteheadParenthesis:
 995            # always either add or delete parentheses
 996            cost += 1
 997            op_list.append(("editnoteheadparenthesis", annNote1, annNote2, 1))
 998        # add for stemDirection
 999        if annNote1.stemDirection != annNote2.stemDirection:
1000            stemdir_cost: int
1001            if annNote1.stemDirection == 'noStem' or annNote2.stemDirection == 'noStem':
1002                # gonna add a stem
1003                stemdir_cost = 1
1004            else:
1005                # gonna change a stem (add then delete)
1006                stemdir_cost = 2
1007            cost += stemdir_cost
1008            op_list.append(("editstemdirection", annNote1, annNote2, stemdir_cost))
1009        # add for the styledict
1010        if annNote1.styledict != annNote2.styledict:
1011            cost += 1
1012            op_list.append(("editstyle", annNote1, annNote2, 1))
1013
1014        return op_list, cost
1015
1016    @staticmethod
1017    @_memoize_beamtuplet_lev_diff
1018    def _beamtuplet_levenshtein_diff(original, compare_to, note1, note2, which):
1019        """
1020        Compute the levenshtein distance between two sequences of beaming or tuples.
1021        Arguments:
1022            original {list} -- list of strings (start, stop, continue or partial)
1023            compare_to {list} -- list of strings (start, stop, continue or partial)
1024            note1 {AnnNote} -- the note for referencing in the score
1025            note2 {AnnNote} -- the note for referencing in the score
1026            which -- a string: "beam" or "tuplet" depending what we are comparing
1027        """
1028        if which not in ("beam", "tuplet"):
1029            raise ValueError("Argument 'which' must be either 'beam' or 'tuplet'")
1030
1031        if len(original) == 0 and len(compare_to) == 0:
1032            return [], 0
1033
1034        if len(original) == 0:
1035            op_list, cost = Comparison._beamtuplet_levenshtein_diff(
1036                original, compare_to[1:], note1, note2, which
1037            )
1038            op_list.append(("ins" + which, note1, note2, 1))
1039            cost += 1
1040            return op_list, cost
1041
1042        if len(compare_to) == 0:
1043            op_list, cost = Comparison._beamtuplet_levenshtein_diff(
1044                original[1:], compare_to, note1, note2, which
1045            )
1046            op_list.append(("del" + which, note1, note2, 1))
1047            cost += 1
1048            return op_list, cost
1049
1050        # compute the cost and the op_list for the many possibilities of recursion
1051        cost = {}
1052        op_list = {}
1053        # delwhich
1054        op_list["del" + which], cost["del" + which] = Comparison._beamtuplet_levenshtein_diff(
1055            original[1:], compare_to, note1, note2, which
1056        )
1057        cost["del" + which] += 1
1058        op_list["del" + which].append(("del" + which, note1, note2, 1))
1059        # inswhich
1060        op_list["ins" + which], cost["ins" + which] = Comparison._beamtuplet_levenshtein_diff(
1061            original, compare_to[1:], note1, note2, which
1062        )
1063        cost["ins" + which] += 1
1064        op_list["ins" + which].append(("ins" + which, note1, note2, 1))
1065        # editwhich
1066        op_list["edit" + which], cost["edit" + which] = Comparison._beamtuplet_levenshtein_diff(
1067            original[1:], compare_to[1:], note1, note2, which
1068        )
1069        if original[0] == compare_to[0]:
1070            beam_diff_op_list = []
1071            beam_diff_cost = 0
1072        else:
1073            beam_diff_op_list, beam_diff_cost = [("edit" + which, note1, note2, 1)], 1
1074        cost["edit" + which] += beam_diff_cost
1075        op_list["edit" + which].extend(beam_diff_op_list)
1076        # compute the minimum of the possibilities
1077        min_key = min(cost, key=cost.get)
1078        out = op_list[min_key], cost[min_key]
1079        return out
1080
1081    @staticmethod
1082    @_memoize_generic_lev_diff
1083    def _generic_levenshtein_diff(original, compare_to, note1, note2, which):
1084        """
1085        Compute the Levenshtein distance between two generic sequences of symbols
1086        (e.g., articulations).
1087        Arguments:
1088            original {list} -- list of strings
1089            compare_to {list} -- list of strings
1090            note1 {AnnNote} -- the note for referencing in the score
1091            note2 {AnnNote} -- the note for referencing in the score
1092            which -- a string: e.g. "articulation" depending what we are comparing
1093        """
1094        if len(original) == 0 and len(compare_to) == 0:
1095            return [], 0
1096
1097        if len(original) == 0:
1098            op_list, cost = Comparison._generic_levenshtein_diff(
1099                original, compare_to[1:], note1, note2, which
1100            )
1101            op_list.append(("ins" + which, note1, note2, 1))
1102            cost += 1
1103            return op_list, cost
1104
1105        if len(compare_to) == 0:
1106            op_list, cost = Comparison._generic_levenshtein_diff(
1107                original[1:], compare_to, note1, note2, which
1108            )
1109            op_list.append(("del" + which, note1, note2, 1))
1110            cost += 1
1111            return op_list, cost
1112
1113        # compute the cost and the op_list for the many possibilities of recursion
1114        cost = {}
1115        op_list = {}
1116        # delwhich
1117        op_list["del" + which], cost["del" + which] = Comparison._generic_levenshtein_diff(
1118            original[1:], compare_to, note1, note2, which
1119        )
1120        cost["del" + which] += 1
1121        op_list["del" + which].append(("del" + which, note1, note2, 1))
1122        # inswhich
1123        op_list["ins" + which], cost["ins" + which] = Comparison._generic_levenshtein_diff(
1124            original, compare_to[1:], note1, note2, which
1125        )
1126        cost["ins" + which] += 1
1127        op_list["ins" + which].append(("ins" + which, note1, note2, 1))
1128        # editwhich
1129        op_list["edit" + which], cost["edit" + which] = Comparison._generic_levenshtein_diff(
1130            original[1:], compare_to[1:], note1, note2, which
1131        )
1132        if original[0] == compare_to[0]:  # to avoid perform the diff
1133            generic_diff_op_list = []
1134            generic_diff_cost = 0
1135        else:
1136            generic_diff_op_list, generic_diff_cost = (
1137                [("edit" + which, note1, note2, 1)],
1138                1,
1139            )
1140        cost["edit" + which] += generic_diff_cost
1141        op_list["edit" + which].extend(generic_diff_op_list)
1142        # compute the minimum of the possibilities
1143        min_key = min(cost, key=cost.get)
1144        out = op_list[min_key], cost[min_key]
1145        return out
1146
1147    @staticmethod
1148    @_memoize_notes_set_distance
1149    def _notes_set_distance(original: list[AnnNote], compare_to: list[AnnNote]):
1150        """
1151        Gather up pairs of matching notes (using pitch, offset, graceness, and visual duration, in
1152        that order of importance).  If you can't find an exactly matching note, try again without
1153        visual duration.
1154        original [list] -- a list of AnnNote (which are never chords)
1155        compare_to [list] -- a list of AnnNote (which are never chords)
1156        """
1157        paired_notes: list[tuple[AnnNote, AnnNote]] = []
1158        unpaired_orig_notes: list[AnnNote] = []
1159        unpaired_comp_notes: list[AnnNote] = copy.copy(compare_to)
1160
1161        for orig_n in original:
1162            fallback: AnnNote | None = None
1163            fallback_i: int = -1
1164            found_it: bool = False
1165            for i, comp_n in enumerate(unpaired_comp_notes):
1166                if orig_n.pitches[0][0] != comp_n.pitches[0][0]:
1167                    # this pitch comparison (1) assumes the note is not a chord
1168                    # (because we don't do chords when Voicing is not set, and
1169                    # we only call _notes_set_distance when Voicing is not set),
1170                    # and (2) only compares the visual position of the note (we
1171                    # are ignoring the accidental here).  This is so that an
1172                    # accidental change will show up as a pitch edit, not a
1173                    # note remove/insert.
1174                    continue
1175                if Comparison._areDifferentEnough(orig_n.note_offset, comp_n.note_offset):
1176                    continue
1177                if orig_n.note_is_grace != comp_n.note_is_grace:
1178                    continue
1179                if fallback is None:
1180                    fallback = comp_n
1181                    fallback_i = i
1182
1183                if orig_n.note_dur_type != comp_n.note_dur_type:
1184                    continue
1185                if orig_n.note_dur_dots != comp_n.note_dur_dots:
1186                    continue
1187
1188                # found a perfect match
1189                paired_notes.append((orig_n, comp_n))
1190
1191                # remove comp_n from unpaired_comp_notes
1192                unpaired_comp_notes.pop(i)  # remove(comp_n) would sometimes get the wrong one
1193
1194                found_it = True
1195                break
1196
1197            if found_it:
1198                # on to the next original note
1199                continue
1200
1201            # did we find a fallback (matched except for duration)?
1202            if fallback is not None:
1203                paired_notes.append((orig_n, fallback))
1204                unpaired_comp_notes.pop(fallback_i)
1205                continue
1206
1207            # we found nothing
1208            unpaired_orig_notes.append(orig_n)
1209
1210        # compute the cost and the op_list
1211        cost: int = 0
1212        op_list: list = []
1213
1214        # notedel
1215        if unpaired_orig_notes:
1216            for an in unpaired_orig_notes:
1217                cost += an.notation_size()
1218                op_list.append(("notedel", an, None, an.notation_size(), an.note_idx_in_chord))
1219
1220        # noteins
1221        if unpaired_comp_notes:
1222            for an in unpaired_comp_notes:
1223                cost += an.notation_size()
1224                op_list.append(("noteins", None, an, an.notation_size(), an.note_idx_in_chord))
1225
1226        # notesub
1227        if paired_notes:
1228            for ano, anc in paired_notes:
1229                if ano == anc:
1230                    # if equal, avoid _annotated_note_diff call
1231                    notesub_op, notesub_cost = [], 0
1232                else:
1233                    notesub_op, notesub_cost = (
1234                        Comparison._annotated_note_diff(ano, anc)
1235                    )
1236                cost += notesub_cost
1237                op_list.extend(notesub_op)
1238
1239        return op_list, cost
1240
1241    @staticmethod
1242    @_memoize_extras_set_distance
1243    def _extras_set_distance(original: list[AnnExtra], compare_to: list[AnnExtra]):
1244        """
1245        Gather up pairs of matching extras (using kind, offset, and visual duration, in
1246        that order of importance).  If you can't find an exactly matching extra, try again
1247        without visual duration.
1248        original [list] -- a list of AnnExtras
1249        compare_to [list] -- a list of AnnExtras
1250        """
1251        paired_extras: list[tuple[AnnExtra, AnnExtra]] = []
1252        unpaired_orig_extras: list[AnnExtra] = []
1253        unpaired_comp_extras: list[AnnExtra] = copy.copy(compare_to)
1254
1255        for orig_x in original:
1256            fallback: AnnExtra | None = None
1257            fallback_i: int = -1
1258            found_it: bool = False
1259            for i, comp_x in enumerate(unpaired_comp_extras):
1260                # kind and offset are required for pairing
1261                if orig_x.kind != comp_x.kind:
1262                    continue
1263                if Comparison._areDifferentEnough(orig_x.offset, comp_x.offset):
1264                    continue
1265                if fallback is None:
1266                    fallback = comp_x
1267                    fallback_i = i
1268
1269                # duration is preferred for pairing
1270                if Comparison._areDifferentEnough(orig_x.duration, comp_x.duration):
1271                    continue
1272
1273                # there are a few kind-specific elements that are also preferred:
1274                #   'direction'/'ending': content (visible text)
1275                #   'keysig'/'timesig'/'clef': symbolic (there are sometimes two
1276                #       simultaneous keysigs, timesigs, or clefs, and we don't
1277                #       want to confuse which one is which, producing a diff where
1278                #       there actually isn't one)
1279                #   'slur': placements (because there are often two identical slurs
1280                #       whose only difference is 'above' vs 'below', producing a diff
1281                #       where there actually isn't one)
1282                if orig_x.kind in ('direction', 'ending'):
1283                    if orig_x.content != comp_x.content:
1284                        continue
1285                if orig_x.kind == 'clef':
1286                    if orig_x.symbolic != comp_x.symbolic:
1287                        continue
1288                if orig_x.kind in ('keysig', 'timesig'):
1289                    if orig_x.infodict != comp_x.infodict:
1290                        continue
1291                if orig_x.kind == 'slur':
1292                    orig_placement: str = orig_x.styledict.get('placement', '')
1293                    comp_placement: str = comp_x.styledict.get('placement', '')
1294                    if orig_placement != comp_placement:
1295                        continue
1296
1297                # found a perfect match
1298                paired_extras.append((orig_x, comp_x))
1299
1300                # remove comp_n from unpaired_comp_extras
1301                unpaired_comp_extras.pop(i)  # remove(comp_n) would sometimes get the wrong one
1302
1303                found_it = True
1304                break
1305
1306            if found_it:
1307                # on to the next original extra
1308                continue
1309
1310            # did we find a fallback (matched except for duration)?
1311            if fallback is not None:
1312                paired_extras.append((orig_x, fallback))
1313                unpaired_comp_extras.pop(fallback_i)
1314                continue
1315
1316            # we found nothing
1317            unpaired_orig_extras.append(orig_x)
1318
1319        # compute the cost and the op_list
1320        cost: int = 0
1321        op_list: list = []
1322
1323        # extradel
1324        for extra in unpaired_orig_extras:
1325            cost += extra.notation_size()
1326            op_list.append(("extradel", extra, None, extra.notation_size()))
1327
1328        # extrains
1329        for extra in unpaired_comp_extras:
1330            cost += extra.notation_size()
1331            op_list.append(("extrains", None, extra, extra.notation_size()))
1332
1333        # extrasub
1334        if paired_extras:
1335            for extrao, extrac in paired_extras:
1336                if extrao == extrac:
1337                    # if equal, avoid _annotated_extra_diff call
1338                    extrasub_op, extrasub_cost = [], 0
1339                else:
1340                    extrasub_op, extrasub_cost = (
1341                        Comparison._annotated_extra_diff(extrao, extrac)
1342                    )
1343                cost += extrasub_cost
1344                op_list.extend(extrasub_op)
1345
1346        return op_list, cost
1347
1348    @staticmethod
1349    @_memoize_metadata_items_set_distance
1350    def _metadata_items_set_distance(
1351        original: list[AnnMetadataItem],
1352        compare_to: list[AnnMetadataItem]
1353    ):
1354        """
1355        Gather up pairs of matching metadata_items (using key and value).  If you can't find
1356        an exactly matching metadata_item, try again without value.
1357        original [list] -- a list of AnnMetadataItems
1358        compare_to [list] -- a list of AnnMetadataItems
1359        """
1360        paired_metadata_items: list[tuple[AnnMetadataItem, AnnMetadataItem]] = []
1361        unpaired_orig_metadata_items: list[AnnMetadataItem] = []
1362        unpaired_comp_metadata_items: list[AnnMetadataItem] = copy.copy(compare_to)
1363
1364        # first look for perfect matches
1365        for orig_mdi in original:
1366            found_it: bool = False
1367            for i, comp_mdi in enumerate(unpaired_comp_metadata_items):
1368                # key is required for perfect match
1369                if orig_mdi.key != comp_mdi.key:
1370                    continue
1371
1372                # value is required for perfect match
1373                if orig_mdi.value != comp_mdi.value:
1374                    continue
1375
1376                # found a perfect match
1377                paired_metadata_items.append((orig_mdi, comp_mdi))
1378
1379                # remove comp_mdi from unpaired_comp_metadata_items
1380                unpaired_comp_metadata_items.pop(i)
1381
1382                found_it = True
1383                break
1384
1385            if found_it:
1386                # on to the next original metadata_item
1387                continue
1388
1389            # we found no perfect match
1390            unpaired_orig_metadata_items.append(orig_mdi)
1391
1392        # now look among the unpaired remainders for key match only
1393        remove_unpaired_orig_items: list[AnnMetadataItem] = []
1394        for orig_mdi in unpaired_orig_metadata_items:
1395            for comp_idx, comp_mdi in enumerate(unpaired_comp_metadata_items):
1396                # key is required for key-only match
1397                if orig_mdi.key != comp_mdi.key:
1398                    continue
1399
1400                # found a key-only match
1401                paired_metadata_items.append((orig_mdi, comp_mdi))
1402
1403                # remove comp_mdi from unpaired_comp_metadata_items
1404                unpaired_comp_metadata_items.pop(comp_idx)
1405
1406                # make a note of unpaired_orig_metadata_item to remove later
1407                remove_unpaired_orig_items.append(orig_mdi)
1408                break
1409
1410        for remove_mdi in remove_unpaired_orig_items:
1411            unpaired_orig_metadata_items.remove(remove_mdi)
1412
1413        # compute the cost and the op_list
1414        cost: int = 0
1415        op_list: list = []
1416
1417        # mditemdel
1418        for metadata_item in unpaired_orig_metadata_items:
1419            cost += metadata_item.notation_size()
1420            op_list.append(("mditemdel", metadata_item, None, metadata_item.notation_size()))
1421
1422        # mditemins
1423        for metadata_item in unpaired_comp_metadata_items:
1424            cost += metadata_item.notation_size()
1425            op_list.append(("mditemins", None, metadata_item, metadata_item.notation_size()))
1426
1427        # mditemsub
1428        if paired_metadata_items:
1429            for metadata_itemo, metadata_itemc in paired_metadata_items:
1430                if metadata_itemo == metadata_itemc:
1431                    # if equal, avoid _annotated_metadata_item_diff call
1432                    metadata_itemsub_op, metadata_itemsub_cost = [], 0
1433                else:
1434                    metadata_itemsub_op, metadata_itemsub_cost = (
1435                        Comparison._annotated_metadata_item_diff(metadata_itemo, metadata_itemc)
1436                    )
1437                cost += metadata_itemsub_cost
1438                op_list.extend(metadata_itemsub_op)
1439
1440        return op_list, cost
1441
1442    @staticmethod
1443    @_memoize_staff_groups_set_distance
1444    def _staff_groups_set_distance(
1445        original: list[AnnStaffGroup],
1446        compare_to: list[AnnStaffGroup]
1447    ):
1448        """
1449        Gather up pairs of matching staffgroups (using start_index and end_index, in
1450        that order of importance).  If you can't find an exactly matching staffgroup,
1451        try again without end_index.
1452        original [list] -- a list of AnnStaffGroups
1453        compare_to [list] -- a list of AnnStaffGroups
1454        """
1455        paired_staff_groups: list[tuple[AnnStaffGroup, AnnStaffGroup]] = []
1456        unpaired_orig_staff_groups: list[AnnStaffGroup] = []
1457        unpaired_comp_staff_groups: list[AnnStaffGroup] = copy.copy(compare_to)
1458
1459        for orig_sg in original:
1460            fallback: AnnStaffGroup | None = None
1461            fallback_i: int = -1
1462            found_it: bool = False
1463            for i, comp_sg in enumerate(unpaired_comp_staff_groups):
1464                # start index is required for pairing
1465                if orig_sg.part_indices[0] != comp_sg.part_indices[0]:
1466                    continue
1467                if fallback is None:
1468                    fallback = comp_sg
1469                    fallback_i = i
1470
1471                # end index and symbol (brace, bracket, etc) is preferred for pairing
1472                if orig_sg.part_indices[-1] != comp_sg.part_indices[-1]:
1473                    continue
1474                if orig_sg.symbol != comp_sg.symbol:
1475                    continue
1476
1477                # found a perfect match
1478                paired_staff_groups.append((orig_sg, comp_sg))
1479
1480                # remove comp_n from unpaired_comp_staff_groups
1481                unpaired_comp_staff_groups.pop(i)
1482
1483                found_it = True
1484                break
1485
1486            if found_it:
1487                # on to the next original staff_group
1488                continue
1489
1490            # did we find a fallback (matched except for duration)?
1491            if fallback is not None:
1492                paired_staff_groups.append((orig_sg, fallback))
1493                unpaired_comp_staff_groups.pop(fallback_i)
1494                continue
1495
1496            # we found nothing
1497            unpaired_orig_staff_groups.append(orig_sg)
1498
1499        # compute the cost and the op_list
1500        cost: int = 0
1501        op_list: list = []
1502
1503        # staffgrpdel
1504        for staff_group in unpaired_orig_staff_groups:
1505            cost += staff_group.notation_size()
1506            op_list.append(("staffgrpdel", staff_group, None, staff_group.notation_size()))
1507
1508        # staffgrpins
1509        for staff_group in unpaired_comp_staff_groups:
1510            cost += staff_group.notation_size()
1511            op_list.append(("staffgrpins", None, staff_group, staff_group.notation_size()))
1512
1513        # staffgrpsub
1514        if paired_staff_groups:
1515            for staff_groupo, staff_groupc in paired_staff_groups:
1516                if staff_groupo == staff_groupc:
1517                    # if equal, avoid _annotated_staff_group_diff call
1518                    staff_groupsub_op, staff_groupsub_cost = [], 0
1519                else:
1520                    staff_groupsub_op, staff_groupsub_cost = (
1521                        Comparison._annotated_staff_group_diff(staff_groupo, staff_groupc)
1522                    )
1523                cost += staff_groupsub_cost
1524                op_list.extend(staff_groupsub_op)
1525
1526        return op_list, cost
1527
1528    @staticmethod
1529    def _voices_coupling_recursive(original: list[AnnVoice], compare_to: list[AnnVoice]):
1530        """
1531        Compare all the possible voices permutations, considering also deletion and
1532        insertion (equation on office lens).
1533        original [list] -- a list of Voice
1534        compare_to [list] -- a list of Voice
1535        """
1536        if len(original) == 0 and len(compare_to) == 0:  # stop the recursion
1537            return [], 0
1538
1539        if len(original) == 0:
1540            # insertion
1541            op_list, cost = Comparison._voices_coupling_recursive(original, compare_to[1:])
1542            # add for the inserted voice
1543            op_list.append(("voiceins", None, compare_to[0], compare_to[0].notation_size()))
1544            cost += compare_to[0].notation_size()
1545            return op_list, cost
1546
1547        if len(compare_to) == 0:
1548            # deletion
1549            op_list, cost = Comparison._voices_coupling_recursive(original[1:], compare_to)
1550            # add for the deleted voice
1551            op_list.append(("voicedel", original[0], None, original[0].notation_size()))
1552            cost += original[0].notation_size()
1553            return op_list, cost
1554
1555        cost = {}
1556        op_list = {}
1557        # deletion
1558        op_list["voicedel"], cost["voicedel"] = Comparison._voices_coupling_recursive(
1559            original[1:], compare_to
1560        )
1561        op_list["voicedel"].append(
1562            ("voicedel", original[0], None, original[0].notation_size())
1563        )
1564        cost["voicedel"] += original[0].notation_size()
1565        for i, c in enumerate(compare_to):
1566            # substitution
1567            (
1568                op_list["voicesub" + str(i)],
1569                cost["voicesub" + str(i)],
1570            ) = Comparison._voices_coupling_recursive(
1571                original[1:], compare_to[:i] + compare_to[i + 1:]
1572            )
1573            if (
1574                compare_to[0] != original[0]
1575            ):  # add the cost of the sub and the operations from inside_bar_diff
1576                op_list_inside_bar, cost_inside_bar = Comparison._inside_bars_diff_lin(
1577                    original[0].annot_notes, c.annot_notes
1578                )  # compute the distance from original[0] and compare_to[i]
1579                op_list["voicesub" + str(i)].extend(op_list_inside_bar)
1580                cost["voicesub" + str(i)] += cost_inside_bar
1581        # compute the minimum of the possibilities
1582        min_key = min(cost, key=cost.get)
1583        out = op_list[min_key], cost[min_key]
1584        return out
1585
1586    @staticmethod
1587    def annotated_scores_diff(score1: AnnScore, score2: AnnScore) -> tuple[list[tuple], int]:
1588        '''
1589        Compare two annotated scores, computing an operations list and the cost of applying those
1590        operations to the first score to generate the second score.
1591
1592        Args:
1593            score1 (`musicdiff.annotation.AnnScore`): The first annotated score to compare.
1594            score2 (`musicdiff.annotation.AnnScore`): The second annotated score to compare.
1595
1596        Returns:
1597            list[tuple], int: The operations list and the cost
1598        '''
1599        # Clear all memoizer caches, in case we are called again with different scores.
1600        # The cached results are no longer valid.
1601        Comparison._clear_memoizer_caches()
1602
1603        op_list_total: list[tuple] = []
1604        cost_total: int = 0
1605
1606        if score1.n_of_parts == score2.n_of_parts:
1607            n_of_parts = score1.n_of_parts
1608        else:
1609            # The two scores have differing number of parts.  For now, assume that
1610            # the parts are in the same order in both scores, and that the missing
1611            # parts are the ones that should have been at the end of the smaller
1612            # score. In future we could do something like we do with voices, where
1613            # we try all the combinations and compare the most-similar pairs of
1614            # parts, and the rest are considered to be the extra parts (deleted
1615            # from score1, or inserted into score2).
1616            n_of_parts = min(score1.n_of_parts, score2.n_of_parts)
1617            if score1.n_of_parts > score2.n_of_parts:
1618                # score1 has more parts that must be deleted
1619                for part_idx in range(score2.n_of_parts, score1.n_of_parts):
1620                    deleted_part = score1.part_list[part_idx]
1621                    op_list_total.append(
1622                        (
1623                            "delpart",
1624                            deleted_part,
1625                            None,
1626                            deleted_part.notation_size()
1627                        )
1628                    )
1629                    cost_total += deleted_part.notation_size()
1630            else:
1631                # score2 has more parts that must be inserted
1632                for part_idx in range(score1.n_of_parts, score2.n_of_parts):
1633                    inserted_part = score2.part_list[part_idx]
1634                    op_list_total.append(
1635                        (
1636                            "inspart",
1637                            None,
1638                            inserted_part,
1639                            inserted_part.notation_size()
1640                        )
1641                    )
1642                    cost_total += inserted_part.notation_size()
1643
1644        # iterate over parts that exist in both scores
1645        for p_number in range(n_of_parts):
1646            # compute non-common-subseq
1647            ncs = Comparison._non_common_subsequences_of_measures(
1648                score1.part_list[p_number].bar_list,
1649                score2.part_list[p_number].bar_list,
1650            )
1651            # compute blockdiff
1652            for subseq in ncs:
1653                op_list_block, cost_block = Comparison._block_diff_lin(
1654                    subseq["original"], subseq["compare_to"]
1655                )
1656                op_list_total.extend(op_list_block)
1657                cost_total += cost_block
1658
1659        # compare the staff groups
1660        groups_op_list, groups_cost = Comparison._staff_groups_set_distance(
1661            score1.staff_group_list, score2.staff_group_list
1662        )
1663        op_list_total.extend(groups_op_list)
1664        cost_total += groups_cost
1665
1666        # compare the metadata items
1667        mditems_op_list, mditems_cost = Comparison._metadata_items_set_distance(
1668            score1.metadata_items_list, score2.metadata_items_list
1669        )
1670        op_list_total.extend(mditems_op_list)
1671        cost_total += mditems_cost
1672
1673        # Add the cost of any syntax errors in score1 that were fixed during parsing.
1674        # Ignore enough syntax errors to keep OMR-NED <= 1.0, for consistency.
1675        total_syms: int = score1.notation_size() + score2.notation_size()
1676        cost_plus_errors: int = cost_total + score1.num_syntax_errors_fixed
1677        if cost_plus_errors > total_syms:
1678            adjustment: int = cost_plus_errors - total_syms
1679            score1.num_syntax_errors_fixed -= adjustment
1680
1681        cost_total += score1.num_syntax_errors_fixed
1682
1683        return op_list_total, cost_total

@staticmethod

def annotated_scores_diff( score1: musicdiff.annotation.AnnScore, score2: musicdiff.annotation.AnnScore) -> tuple[list[tuple], int]: View Source

1586    @staticmethod
1587    def annotated_scores_diff(score1: AnnScore, score2: AnnScore) -> tuple[list[tuple], int]:
1588        '''
1589        Compare two annotated scores, computing an operations list and the cost of applying those
1590        operations to the first score to generate the second score.
1591
1592        Args:
1593            score1 (`musicdiff.annotation.AnnScore`): The first annotated score to compare.
1594            score2 (`musicdiff.annotation.AnnScore`): The second annotated score to compare.
1595
1596        Returns:
1597            list[tuple], int: The operations list and the cost
1598        '''
1599        # Clear all memoizer caches, in case we are called again with different scores.
1600        # The cached results are no longer valid.
1601        Comparison._clear_memoizer_caches()
1602
1603        op_list_total: list[tuple] = []
1604        cost_total: int = 0
1605
1606        if score1.n_of_parts == score2.n_of_parts:
1607            n_of_parts = score1.n_of_parts
1608        else:
1609            # The two scores have differing number of parts.  For now, assume that
1610            # the parts are in the same order in both scores, and that the missing
1611            # parts are the ones that should have been at the end of the smaller
1612            # score. In future we could do something like we do with voices, where
1613            # we try all the combinations and compare the most-similar pairs of
1614            # parts, and the rest are considered to be the extra parts (deleted
1615            # from score1, or inserted into score2).
1616            n_of_parts = min(score1.n_of_parts, score2.n_of_parts)
1617            if score1.n_of_parts > score2.n_of_parts:
1618                # score1 has more parts that must be deleted
1619                for part_idx in range(score2.n_of_parts, score1.n_of_parts):
1620                    deleted_part = score1.part_list[part_idx]
1621                    op_list_total.append(
1622                        (
1623                            "delpart",
1624                            deleted_part,
1625                            None,
1626                            deleted_part.notation_size()
1627                        )
1628                    )
1629                    cost_total += deleted_part.notation_size()
1630            else:
1631                # score2 has more parts that must be inserted
1632                for part_idx in range(score1.n_of_parts, score2.n_of_parts):
1633                    inserted_part = score2.part_list[part_idx]
1634                    op_list_total.append(
1635                        (
1636                            "inspart",
1637                            None,
1638                            inserted_part,
1639                            inserted_part.notation_size()
1640                        )
1641                    )
1642                    cost_total += inserted_part.notation_size()
1643
1644        # iterate over parts that exist in both scores
1645        for p_number in range(n_of_parts):
1646            # compute non-common-subseq
1647            ncs = Comparison._non_common_subsequences_of_measures(
1648                score1.part_list[p_number].bar_list,
1649                score2.part_list[p_number].bar_list,
1650            )
1651            # compute blockdiff
1652            for subseq in ncs:
1653                op_list_block, cost_block = Comparison._block_diff_lin(
1654                    subseq["original"], subseq["compare_to"]
1655                )
1656                op_list_total.extend(op_list_block)
1657                cost_total += cost_block
1658
1659        # compare the staff groups
1660        groups_op_list, groups_cost = Comparison._staff_groups_set_distance(
1661            score1.staff_group_list, score2.staff_group_list
1662        )
1663        op_list_total.extend(groups_op_list)
1664        cost_total += groups_cost
1665
1666        # compare the metadata items
1667        mditems_op_list, mditems_cost = Comparison._metadata_items_set_distance(
1668            score1.metadata_items_list, score2.metadata_items_list
1669        )
1670        op_list_total.extend(mditems_op_list)
1671        cost_total += mditems_cost
1672
1673        # Add the cost of any syntax errors in score1 that were fixed during parsing.
1674        # Ignore enough syntax errors to keep OMR-NED <= 1.0, for consistency.
1675        total_syms: int = score1.notation_size() + score2.notation_size()
1676        cost_plus_errors: int = cost_total + score1.num_syntax_errors_fixed
1677        if cost_plus_errors > total_syms:
1678            adjustment: int = cost_plus_errors - total_syms
1679            score1.num_syntax_errors_fixed -= adjustment
1680
1681        cost_total += score1.num_syntax_errors_fixed
1682
1683        return op_list_total, cost_total

Compare two annotated scores, computing an operations list and the cost of applying those operations to the first score to generate the second score.

Arguments:

score1 (musicdiff.annotation.AnnScore): The first annotated score to compare.
score2 (musicdiff.annotation.AnnScore): The second annotated score to compare.

Returns:

list[tuple], int: The operations list and the cost