musicdiff

View Source

  1# ------------------------------------------------------------------------------
  2# Purpose:       musicdiff is a package for comparing music scores using music21.
  3#
  4# Authors:       Greg Chapman <gregc@mac.com>
  5#                musicdiff is derived from:
  6#                   https://github.com/fosfrancesco/music-score-diff.git
  7#                   by Francesco Foscarin <foscarin.francesco@gmail.com>
  8#
  9# Copyright:     (c) 2022-2025 Francesco Foscarin, Greg Chapman
 10# License:       MIT, see LICENSE
 11# ------------------------------------------------------------------------------
 12
 13__docformat__ = "google"
 14
 15import sys
 16import os
 17import json
 18import re
 19import typing as t
 20from pathlib import Path
 21
 22import music21 as m21
 23import converter21
 24
 25from musicdiff.detaillevel import DetailLevel
 26from musicdiff.m21utils import M21Utils
 27from musicdiff.annotation import AnnScore
 28from musicdiff.comparison import Comparison
 29from musicdiff.comparison import EvaluationMetrics
 30from musicdiff.visualization import Visualization
 31
 32def _getInputExtensionsList() -> list[str]:
 33    c = m21.converter.Converter()
 34    inList = c.subConvertersList('input')
 35    result = []
 36    for subc in inList:
 37        for inputExt in subc.registerInputExtensions:
 38            result.append('.' + inputExt)
 39    return result
 40
 41def _printSupportedInputFormats() -> None:
 42    c = m21.converter.Converter()
 43    inList = c.subConvertersList('input')
 44    print("Supported input formats are:", file=sys.stderr)
 45    for subc in inList:
 46        if subc.registerInputExtensions:
 47            print('\tformats   : ' + ', '.join(subc.registerFormats)
 48                    + '\textensions: ' + ', '.join(subc.registerInputExtensions), file=sys.stderr)
 49
 50def diff(
 51    score1: str | Path | m21.stream.Score,
 52    score2: str | Path | m21.stream.Score,
 53    out_path1: str | Path | None = None,
 54    out_path2: str | Path | None = None,
 55    force_parse: bool = True,
 56    visualize_diffs: bool = True,
 57    print_text_output: bool = False,
 58    print_omr_ned_output: bool = False,
 59    fix_first_file_syntax: bool = False,
 60    detail: DetailLevel | int = DetailLevel.Default
 61) -> int | None:
 62    '''
 63    Compare two musical scores and optionally save/display the differences as two marked-up
 64    rendered PDFs.
 65
 66    Args:
 67        score1 (str, Path, music21.stream.Score): The first music score to compare. The score
 68            can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI,
 69            etc), or a music21 Score object.
 70        score2 (str, Path, music21.stream.Score): The second musical score to compare. The score
 71            can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI,
 72            etc), or a music21 Score object.
 73        out_path1 (str, Path): Where to save the first marked-up rendered score PDF.
 74            If out_path1 is None, both PDFs will be displayed in the default PDF viewer.
 75            (default is None)
 76        out_path2 (str, Path): Where to save the second marked-up rendered score PDF.
 77            If out_path2 is None, both PDFs will be displayed in the default PDF viewer.
 78            (default is None)
 79        force_parse (bool): Whether or not to force music21 to re-parse a file it has parsed
 80            previously.
 81            (default is True)
 82        visualize_diffs (bool): Whether or not to render diffs as marked up PDFs. If False,
 83            the only result of the call will be the return value (the number of differences).
 84            (default is True)
 85        print_text_output (bool): Whether or not to print diffs in diff-like text to stdout.
 86            (default is False)
 87        print_omr_ned_output (bool): Whether or not to print the OMR normalized edit distance
 88            (OMR-NED), which is computed as OMR edit distance divided by the total number of
 89            symbols in the two scores.
 90            (default is False)
 91        fix_first_file_syntax (bool): Whether to attempt to fix syntax errors in the first
 92            file (and add the number of such fixes to the returned OMR edit distance).
 93            (default is False)
 94        detail (DetailLevel | int): What level of detail to use during the diff.
 95            Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently
 96            AllObjects), or any combination (with | or &~) of those or NotesAndRests,
 97            Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures,
 98            Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics,
 99            Style, Metadata, or Voicing.
100
101    Returns:
102        int | None: The total OMR Edit Distance, i.e. the number of individual symbols
103            that must be added or deleted. (0 means that the scores were identical, and
104            None means that one or more of the input files failed to parse.)
105    '''
106    # Use the Humdrum/MEI importers from converter21 in place of the ones in music21...
107    # Comment out this line to go back to music21's built-in Humdrum/MEI importers.
108    converter21.register()
109
110    badArg1: bool = False
111    badArg2: bool = False
112    score1Name: str | Path | None = None
113    score2Name: str | Path | None = None
114
115    # Convert input strings to Paths
116    if isinstance(score1, str):
117        score1Name = score1
118        try:
119            score1 = Path(score1)
120        except Exception:  # pylint: disable=broad-exception-caught
121            print(f'score1 ({score1}) is not a valid path.', file=sys.stderr)
122            badArg1 = True
123
124    if isinstance(score2, str):
125        score2Name = score2
126        try:
127            score2 = Path(score2)
128        except Exception:  # pylint: disable=broad-exception-caught
129            print(f'score2 ({score2}) is not a valid path.', file=sys.stderr)
130            badArg2 = True
131
132    if badArg1 or badArg2:
133        return None
134
135    if isinstance(score1, Path):
136        if not score1Name:
137            score1Name = score1
138        fileName1 = score1.name
139        fileExt1 = score1.suffix
140
141        if fileExt1 not in _getInputExtensionsList():
142            print(f'score1 file extension ({fileExt1}) not supported by music21.', file=sys.stderr)
143            badArg1 = True
144
145        if not badArg1:
146            # pylint: disable=broad-except
147            try:
148                sc = m21.converter.parse(
149                    score1,
150                    forceSource=force_parse,
151                    acceptSyntaxErrors=fix_first_file_syntax
152                )
153                if t.TYPE_CHECKING:
154                    assert isinstance(sc, m21.stream.Score)
155                score1 = sc
156
157            except Exception as e:
158                print(f'score1 ({fileName1}) could not be parsed by music21', file=sys.stderr)
159                print(e, file=sys.stderr)
160                badArg1 = True
161            # pylint: enable=broad-except
162
163    if isinstance(score2, Path):
164        if not score2Name:
165            score2Name = score2
166        fileName2: str = score2.name
167        fileExt2: str = score2.suffix
168
169        if fileExt2 not in _getInputExtensionsList():
170            print(f'score2 file extension ({fileExt2}) not supported by music21.', file=sys.stderr)
171            badArg2 = True
172
173        if not badArg2:
174            # pylint: disable=broad-except
175            try:
176                sc = m21.converter.parse(score2, forceSource=force_parse)
177                if t.TYPE_CHECKING:
178                    assert isinstance(sc, m21.stream.Score)
179                score2 = sc
180            except Exception as e:
181                print(f'score2 ({fileName2}) could not be parsed by music21', file=sys.stderr)
182                print(e, file=sys.stderr)
183                badArg2 = True
184            # pylint: enable=broad-except
185
186    if badArg1 or badArg2:
187        return None
188
189    if t.TYPE_CHECKING:
190        assert isinstance(score1, m21.stream.Score)
191        assert isinstance(score2, m21.stream.Score)
192
193    # scan each score, producing an annotated wrapper
194    annotated_score1: AnnScore = AnnScore(score1, detail)
195    annotated_score2: AnnScore = AnnScore(score2, detail)
196
197    diff_list: list
198    cost: int
199    diff_list, cost = Comparison.annotated_scores_diff(annotated_score1, annotated_score2)
200
201    if cost != 0:
202        if visualize_diffs:
203            # you can change these three colors as you like...
204            # Visualization.INSERTED_COLOR = 'red'
205            # Visualization.DELETED_COLOR = 'red'
206            # Visualization.CHANGED_COLOR = 'red'
207
208            # color changed/deleted/inserted notes, add descriptive text for each change, etc
209            Visualization.mark_diffs(score1, score2, diff_list)
210
211            # ask music21 to display the scores as PDFs.  Composer's name will be prepended with
212            # 'score1 ' and 'score2 ', respectively, so you can see which is which.
213            Visualization.show_diffs(score1, score2, out_path1, out_path2)
214
215    if print_omr_ned_output:
216        omr_ned_output: dict = Visualization.get_omr_ned_output(
217            cost, annotated_score1, annotated_score2
218        )
219        jsonStr: str = json.dumps(omr_ned_output, indent=4)
220        print(jsonStr)
221
222    if print_text_output:
223        text_output: str = Visualization.get_text_output(
224            score1, score2, diff_list, score1Name=score1Name, score2Name=score2Name
225        )
226        if text_output:
227            if print_omr_ned_output and print_text_output:
228                # put a blank line between them
229                print('')
230            print(text_output)
231
232    return cost
233
234
235def _diff_omr_ned_metrics(
236    predpath: str | Path,
237    gtpath: str | Path,
238    detail: DetailLevel | int = DetailLevel.Default
239) -> EvaluationMetrics | None:
240    # Returns (numsyms_gt, numsyms_pred, omr_edit_distance, edit_distances_dict, omr_ned).
241    # Returns None if pred or gt is not a music21-importable format.
242    # If import is possible (correct format), but actually fails (incorrect content),
243    # the resulting score will be empty (and omr_ned will be 1.0).
244
245    # Convert input strings to Paths
246    if isinstance(predpath, str):
247        predpath = Path(predpath)
248    if isinstance(gtpath, str):
249        gtpath = Path(gtpath)
250
251    if predpath.suffix not in _getInputExtensionsList():
252        print(
253            f'predicted file extension ({predpath.suffix}) not supported by music21.',
254            file=sys.stderr
255        )
256        return None
257
258    try:
259        predscore = m21.converter.parse(
260            predpath,
261            forceSource=True,
262            acceptSyntaxErrors=True
263        )
264    except Exception:
265        predscore = m21.stream.Score()
266
267    if gtpath.suffix not in _getInputExtensionsList():
268        print(
269            f'ground truth file extension ({gtpath.suffix}) not supported by music21.',
270            file=sys.stderr
271        )
272        return None
273
274    try:
275        gtscore = m21.converter.parse(
276            gtpath,
277            forceSource=True,
278            acceptSyntaxErrors=False
279        )
280    except Exception:
281        gtscore = m21.stream.Score()
282
283    if t.TYPE_CHECKING:
284        assert isinstance(gtscore, m21.stream.Score)
285        assert isinstance(predscore, m21.stream.Score)
286
287    numParts: int = len(list(gtscore.parts))
288    if numParts == 0:
289        return None
290
291    # scan each score, producing an annotated wrapper
292    if t.TYPE_CHECKING:
293        assert isinstance(predscore, m21.stream.Score)
294        assert isinstance(gtscore, m21.stream.Score)
295    ann_predscore: AnnScore = AnnScore(predscore, detail)
296    ann_gtscore: AnnScore = AnnScore(gtscore, detail)
297
298    numsyms_gt: int = ann_gtscore.notation_size()
299    numsyms_pred: int = ann_predscore.notation_size()
300    op_list: list
301    omr_edit_distance: int
302    op_list, omr_edit_distance = Comparison.annotated_scores_diff(ann_predscore, ann_gtscore)
303    edit_distances_dict: dict[str, int] = Visualization.get_edit_distances_dict(
304        op_list,
305        ann_predscore.num_syntax_errors_fixed,
306        detail
307    )
308    omr_ned = Visualization.get_omr_ned(omr_edit_distance, numsyms_pred, numsyms_gt)
309    metrics = EvaluationMetrics(
310        gtpath, predpath, numsyms_gt, numsyms_pred, omr_edit_distance, edit_distances_dict, omr_ned
311    )
312    return metrics
313
314
315def diff_ml_training(
316    predicted_folder: str,
317    ground_truth_folder: str,
318    output_folder: str,
319    detail: DetailLevel | int = DetailLevel.Default,
320) -> tuple[float, str]:
321    '''
322    Compare two folders of musical scores, and produce a CSV spreadsheet of results, including
323    the overall OMR-NED score for the batch.
324
325    Args:
326        predicted_folder (str): The folder full of predicted scores. The scores
327            can be of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, etc).
328            Each score must have the exact same filename as the corresponding ground
329            truth score.
330        ground_truth_folder (str): The folder full of ground truth scores. Each score must
331            have the exact same filename as the corresponding predicted score.
332        output_folder (str): The folder in which to save the output spreadsheet (output.csv).
333        detail (DetailLevel | int): What level of detail to use during the comparisons.
334            Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently
335            AllObjects), or any combination (with | or &~) of those or NotesAndRests,
336            Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures,
337            Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics,
338            Style, Metadata, or Voicing.
339
340    Returns:
341        tuple[float, str]: Overall OMR-NED score for the batch, and the full path to the
342            output spreadsheet (output.csv).
343    '''
344
345    converter21.register()
346
347    output_file_path: str = output_folder + '/output.csv'
348
349    # expand tildes
350    predicted_folder = os.path.expanduser(predicted_folder)
351    ground_truth_folder = os.path.expanduser(ground_truth_folder)
352    output_folder = os.path.expanduser(output_folder)
353
354    metrics_list: list[EvaluationMetrics] = []
355    for name in os.listdir(predicted_folder):
356        predpath: str = os.path.join(predicted_folder, name)
357
358        # check if it is a file
359        if not os.path.isfile(predpath):
360            continue
361
362        # check if there is a same-named file in ground_truth_folder
363        gtpath: str = os.path.join(ground_truth_folder, name)
364        if not os.path.isfile(gtpath):
365            continue
366
367        metrics: EvaluationMetrics | None = _diff_omr_ned_metrics(
368            predpath=predpath, gtpath=gtpath, detail=detail
369        )
370        if metrics is None:
371            continue
372
373        # append metrics to metrics_list
374        metrics_list.append(metrics)
375
376    # sort metrics_list the way you want it to appear in the csv file.
377    # I like it sorted by omr_ned (ascending), so the omr_ned == 0.0 entries
378    # are together at the top, and the omr_ned == 1.0 entries are together
379    # at the bottom.  Within each group of "same omr_ned", sort by filename.
380    def natsortkey(path: str):
381        # splits path into chunks of digits and non-digits. Converts the digit
382        # chunks to integers for numerical comparison and the non-digit chunks
383        # to lowercase for case-insensitive comparison.
384        key: list[int | str] = []
385        for chunk in re.split(r'(\d+)', path):
386            if chunk.isdigit():
387                key.append(int(chunk))
388            else:
389                key.append(chunk.lower())
390        return key
391
392    metrics_list.sort(key=lambda m: (m.omr_ned, natsortkey(str(m.gt_path))))
393    with open(output_file_path, 'wt', encoding='utf-8') as outf:
394        print(Visualization.get_output_csv_header(detail), file=outf)
395
396        for metrics in metrics_list:
397            # append CSV line to output file
398            # (gt path, pred path, gt numsyms, pred numsyms, sym edit cost, omr_ned
399            print(Visualization.get_output_csv_line(metrics, detail), file=outf)
400
401        # append overall score to output file (currently average SER)
402        total_gt_numsyms: int = 0
403        total_pred_numsyms: int = 0
404        total_omr_edit_distance: int = 0
405        if metrics_list:
406            for metrics in metrics_list:
407                total_gt_numsyms += metrics.gt_numsyms
408                total_pred_numsyms += metrics.pred_numsyms
409                total_omr_edit_distance += metrics.omr_edit_distance
410
411        overall_score: float = Visualization.get_omr_ned(
412            total_omr_edit_distance, total_pred_numsyms, total_gt_numsyms
413        )
414
415        print(Visualization.get_output_csv_trailer(metrics_list, detail), file=outf)
416        outf.flush()
417
418    return overall_score, output_file_path

def diff( score1: str | pathlib.Path | music21.stream.base.Score, score2: str | pathlib.Path | music21.stream.base.Score, out_path1: str | pathlib.Path | None = None, out_path2: str | pathlib.Path | None = None, force_parse: bool = True, visualize_diffs: bool = True, print_text_output: bool = False, print_omr_ned_output: bool = False, fix_first_file_syntax: bool = False, detail: musicdiff.detaillevel.DetailLevel | int = <DetailLevel.AllObjects: 32767>) -> int | None: View Source

 51def diff(
 52    score1: str | Path | m21.stream.Score,
 53    score2: str | Path | m21.stream.Score,
 54    out_path1: str | Path | None = None,
 55    out_path2: str | Path | None = None,
 56    force_parse: bool = True,
 57    visualize_diffs: bool = True,
 58    print_text_output: bool = False,
 59    print_omr_ned_output: bool = False,
 60    fix_first_file_syntax: bool = False,
 61    detail: DetailLevel | int = DetailLevel.Default
 62) -> int | None:
 63    '''
 64    Compare two musical scores and optionally save/display the differences as two marked-up
 65    rendered PDFs.
 66
 67    Args:
 68        score1 (str, Path, music21.stream.Score): The first music score to compare. The score
 69            can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI,
 70            etc), or a music21 Score object.
 71        score2 (str, Path, music21.stream.Score): The second musical score to compare. The score
 72            can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI,
 73            etc), or a music21 Score object.
 74        out_path1 (str, Path): Where to save the first marked-up rendered score PDF.
 75            If out_path1 is None, both PDFs will be displayed in the default PDF viewer.
 76            (default is None)
 77        out_path2 (str, Path): Where to save the second marked-up rendered score PDF.
 78            If out_path2 is None, both PDFs will be displayed in the default PDF viewer.
 79            (default is None)
 80        force_parse (bool): Whether or not to force music21 to re-parse a file it has parsed
 81            previously.
 82            (default is True)
 83        visualize_diffs (bool): Whether or not to render diffs as marked up PDFs. If False,
 84            the only result of the call will be the return value (the number of differences).
 85            (default is True)
 86        print_text_output (bool): Whether or not to print diffs in diff-like text to stdout.
 87            (default is False)
 88        print_omr_ned_output (bool): Whether or not to print the OMR normalized edit distance
 89            (OMR-NED), which is computed as OMR edit distance divided by the total number of
 90            symbols in the two scores.
 91            (default is False)
 92        fix_first_file_syntax (bool): Whether to attempt to fix syntax errors in the first
 93            file (and add the number of such fixes to the returned OMR edit distance).
 94            (default is False)
 95        detail (DetailLevel | int): What level of detail to use during the diff.
 96            Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently
 97            AllObjects), or any combination (with | or &~) of those or NotesAndRests,
 98            Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures,
 99            Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics,
100            Style, Metadata, or Voicing.
101
102    Returns:
103        int | None: The total OMR Edit Distance, i.e. the number of individual symbols
104            that must be added or deleted. (0 means that the scores were identical, and
105            None means that one or more of the input files failed to parse.)
106    '''
107    # Use the Humdrum/MEI importers from converter21 in place of the ones in music21...
108    # Comment out this line to go back to music21's built-in Humdrum/MEI importers.
109    converter21.register()
110
111    badArg1: bool = False
112    badArg2: bool = False
113    score1Name: str | Path | None = None
114    score2Name: str | Path | None = None
115
116    # Convert input strings to Paths
117    if isinstance(score1, str):
118        score1Name = score1
119        try:
120            score1 = Path(score1)
121        except Exception:  # pylint: disable=broad-exception-caught
122            print(f'score1 ({score1}) is not a valid path.', file=sys.stderr)
123            badArg1 = True
124
125    if isinstance(score2, str):
126        score2Name = score2
127        try:
128            score2 = Path(score2)
129        except Exception:  # pylint: disable=broad-exception-caught
130            print(f'score2 ({score2}) is not a valid path.', file=sys.stderr)
131            badArg2 = True
132
133    if badArg1 or badArg2:
134        return None
135
136    if isinstance(score1, Path):
137        if not score1Name:
138            score1Name = score1
139        fileName1 = score1.name
140        fileExt1 = score1.suffix
141
142        if fileExt1 not in _getInputExtensionsList():
143            print(f'score1 file extension ({fileExt1}) not supported by music21.', file=sys.stderr)
144            badArg1 = True
145
146        if not badArg1:
147            # pylint: disable=broad-except
148            try:
149                sc = m21.converter.parse(
150                    score1,
151                    forceSource=force_parse,
152                    acceptSyntaxErrors=fix_first_file_syntax
153                )
154                if t.TYPE_CHECKING:
155                    assert isinstance(sc, m21.stream.Score)
156                score1 = sc
157
158            except Exception as e:
159                print(f'score1 ({fileName1}) could not be parsed by music21', file=sys.stderr)
160                print(e, file=sys.stderr)
161                badArg1 = True
162            # pylint: enable=broad-except
163
164    if isinstance(score2, Path):
165        if not score2Name:
166            score2Name = score2
167        fileName2: str = score2.name
168        fileExt2: str = score2.suffix
169
170        if fileExt2 not in _getInputExtensionsList():
171            print(f'score2 file extension ({fileExt2}) not supported by music21.', file=sys.stderr)
172            badArg2 = True
173
174        if not badArg2:
175            # pylint: disable=broad-except
176            try:
177                sc = m21.converter.parse(score2, forceSource=force_parse)
178                if t.TYPE_CHECKING:
179                    assert isinstance(sc, m21.stream.Score)
180                score2 = sc
181            except Exception as e:
182                print(f'score2 ({fileName2}) could not be parsed by music21', file=sys.stderr)
183                print(e, file=sys.stderr)
184                badArg2 = True
185            # pylint: enable=broad-except
186
187    if badArg1 or badArg2:
188        return None
189
190    if t.TYPE_CHECKING:
191        assert isinstance(score1, m21.stream.Score)
192        assert isinstance(score2, m21.stream.Score)
193
194    # scan each score, producing an annotated wrapper
195    annotated_score1: AnnScore = AnnScore(score1, detail)
196    annotated_score2: AnnScore = AnnScore(score2, detail)
197
198    diff_list: list
199    cost: int
200    diff_list, cost = Comparison.annotated_scores_diff(annotated_score1, annotated_score2)
201
202    if cost != 0:
203        if visualize_diffs:
204            # you can change these three colors as you like...
205            # Visualization.INSERTED_COLOR = 'red'
206            # Visualization.DELETED_COLOR = 'red'
207            # Visualization.CHANGED_COLOR = 'red'
208
209            # color changed/deleted/inserted notes, add descriptive text for each change, etc
210            Visualization.mark_diffs(score1, score2, diff_list)
211
212            # ask music21 to display the scores as PDFs.  Composer's name will be prepended with
213            # 'score1 ' and 'score2 ', respectively, so you can see which is which.
214            Visualization.show_diffs(score1, score2, out_path1, out_path2)
215
216    if print_omr_ned_output:
217        omr_ned_output: dict = Visualization.get_omr_ned_output(
218            cost, annotated_score1, annotated_score2
219        )
220        jsonStr: str = json.dumps(omr_ned_output, indent=4)
221        print(jsonStr)
222
223    if print_text_output:
224        text_output: str = Visualization.get_text_output(
225            score1, score2, diff_list, score1Name=score1Name, score2Name=score2Name
226        )
227        if text_output:
228            if print_omr_ned_output and print_text_output:
229                # put a blank line between them
230                print('')
231            print(text_output)
232
233    return cost

Compare two musical scores and optionally save/display the differences as two marked-up rendered PDFs.

Arguments:

score1 (str, Path, music21.stream.Score): The first music score to compare. The score can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI, etc), or a music21 Score object.
score2 (str, Path, music21.stream.Score): The second musical score to compare. The score can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI, etc), or a music21 Score object.
out_path1 (str, Path): Where to save the first marked-up rendered score PDF. If out_path1 is None, both PDFs will be displayed in the default PDF viewer. (default is None)
out_path2 (str, Path): Where to save the second marked-up rendered score PDF. If out_path2 is None, both PDFs will be displayed in the default PDF viewer. (default is None)
force_parse (bool): Whether or not to force music21 to re-parse a file it has parsed previously. (default is True)
visualize_diffs (bool): Whether or not to render diffs as marked up PDFs. If False, the only result of the call will be the return value (the number of differences). (default is True)
print_text_output (bool): Whether or not to print diffs in diff-like text to stdout. (default is False)
print_omr_ned_output (bool): Whether or not to print the OMR normalized edit distance (OMR-NED), which is computed as OMR edit distance divided by the total number of symbols in the two scores. (default is False)
fix_first_file_syntax (bool): Whether to attempt to fix syntax errors in the first file (and add the number of such fixes to the returned OMR edit distance). (default is False)
detail (DetailLevel | int): What level of detail to use during the diff. Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently AllObjects), or any combination (with | or &~) of those or NotesAndRests, Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures, Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics, Style, Metadata, or Voicing.

Returns:

int | None: The total OMR Edit Distance, i.e. the number of individual symbols that must be added or deleted. (0 means that the scores were identical, and None means that one or more of the input files failed to parse.)

def diff_ml_training( predicted_folder: str, ground_truth_folder: str, output_folder: str, detail: musicdiff.detaillevel.DetailLevel | int = <DetailLevel.AllObjects: 32767>) -> tuple[float, str]: View Source

316def diff_ml_training(
317    predicted_folder: str,
318    ground_truth_folder: str,
319    output_folder: str,
320    detail: DetailLevel | int = DetailLevel.Default,
321) -> tuple[float, str]:
322    '''
323    Compare two folders of musical scores, and produce a CSV spreadsheet of results, including
324    the overall OMR-NED score for the batch.
325
326    Args:
327        predicted_folder (str): The folder full of predicted scores. The scores
328            can be of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, etc).
329            Each score must have the exact same filename as the corresponding ground
330            truth score.
331        ground_truth_folder (str): The folder full of ground truth scores. Each score must
332            have the exact same filename as the corresponding predicted score.
333        output_folder (str): The folder in which to save the output spreadsheet (output.csv).
334        detail (DetailLevel | int): What level of detail to use during the comparisons.
335            Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently
336            AllObjects), or any combination (with | or &~) of those or NotesAndRests,
337            Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures,
338            Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics,
339            Style, Metadata, or Voicing.
340
341    Returns:
342        tuple[float, str]: Overall OMR-NED score for the batch, and the full path to the
343            output spreadsheet (output.csv).
344    '''
345
346    converter21.register()
347
348    output_file_path: str = output_folder + '/output.csv'
349
350    # expand tildes
351    predicted_folder = os.path.expanduser(predicted_folder)
352    ground_truth_folder = os.path.expanduser(ground_truth_folder)
353    output_folder = os.path.expanduser(output_folder)
354
355    metrics_list: list[EvaluationMetrics] = []
356    for name in os.listdir(predicted_folder):
357        predpath: str = os.path.join(predicted_folder, name)
358
359        # check if it is a file
360        if not os.path.isfile(predpath):
361            continue
362
363        # check if there is a same-named file in ground_truth_folder
364        gtpath: str = os.path.join(ground_truth_folder, name)
365        if not os.path.isfile(gtpath):
366            continue
367
368        metrics: EvaluationMetrics | None = _diff_omr_ned_metrics(
369            predpath=predpath, gtpath=gtpath, detail=detail
370        )
371        if metrics is None:
372            continue
373
374        # append metrics to metrics_list
375        metrics_list.append(metrics)
376
377    # sort metrics_list the way you want it to appear in the csv file.
378    # I like it sorted by omr_ned (ascending), so the omr_ned == 0.0 entries
379    # are together at the top, and the omr_ned == 1.0 entries are together
380    # at the bottom.  Within each group of "same omr_ned", sort by filename.
381    def natsortkey(path: str):
382        # splits path into chunks of digits and non-digits. Converts the digit
383        # chunks to integers for numerical comparison and the non-digit chunks
384        # to lowercase for case-insensitive comparison.
385        key: list[int | str] = []
386        for chunk in re.split(r'(\d+)', path):
387            if chunk.isdigit():
388                key.append(int(chunk))
389            else:
390                key.append(chunk.lower())
391        return key
392
393    metrics_list.sort(key=lambda m: (m.omr_ned, natsortkey(str(m.gt_path))))
394    with open(output_file_path, 'wt', encoding='utf-8') as outf:
395        print(Visualization.get_output_csv_header(detail), file=outf)
396
397        for metrics in metrics_list:
398            # append CSV line to output file
399            # (gt path, pred path, gt numsyms, pred numsyms, sym edit cost, omr_ned
400            print(Visualization.get_output_csv_line(metrics, detail), file=outf)
401
402        # append overall score to output file (currently average SER)
403        total_gt_numsyms: int = 0
404        total_pred_numsyms: int = 0
405        total_omr_edit_distance: int = 0
406        if metrics_list:
407            for metrics in metrics_list:
408                total_gt_numsyms += metrics.gt_numsyms
409                total_pred_numsyms += metrics.pred_numsyms
410                total_omr_edit_distance += metrics.omr_edit_distance
411
412        overall_score: float = Visualization.get_omr_ned(
413            total_omr_edit_distance, total_pred_numsyms, total_gt_numsyms
414        )
415
416        print(Visualization.get_output_csv_trailer(metrics_list, detail), file=outf)
417        outf.flush()
418
419    return overall_score, output_file_path

Compare two folders of musical scores, and produce a CSV spreadsheet of results, including the overall OMR-NED score for the batch.

Arguments:

predicted_folder (str): The folder full of predicted scores. The scores can be of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, etc). Each score must have the exact same filename as the corresponding ground truth score.
ground_truth_folder (str): The folder full of ground truth scores. Each score must have the exact same filename as the corresponding predicted score.
output_folder (str): The folder in which to save the output spreadsheet (output.csv).
detail (DetailLevel | int): What level of detail to use during the comparisons. Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently AllObjects), or any combination (with | or &~) of those or NotesAndRests, Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures, Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics, Style, Metadata, or Voicing.

Returns:

tuple[float, str]: Overall OMR-NED score for the batch, and the full path to the output spreadsheet (output.csv).