musicdiff
1# ------------------------------------------------------------------------------ 2# Purpose: musicdiff is a package for comparing music scores using music21. 3# 4# Authors: Greg Chapman <gregc@mac.com> 5# musicdiff is derived from: 6# https://github.com/fosfrancesco/music-score-diff.git 7# by Francesco Foscarin <foscarin.francesco@gmail.com> 8# 9# Copyright: (c) 2022-2025 Francesco Foscarin, Greg Chapman 10# License: MIT, see LICENSE 11# ------------------------------------------------------------------------------ 12 13__docformat__ = "google" 14 15import sys 16import os 17import json 18import re 19import typing as t 20from pathlib import Path 21 22import music21 as m21 23import converter21 24 25from musicdiff.detaillevel import DetailLevel 26from musicdiff.m21utils import M21Utils 27from musicdiff.annotation import AnnScore 28from musicdiff.comparison import Comparison 29from musicdiff.comparison import EvaluationMetrics 30from musicdiff.visualization import Visualization 31 32def _getInputExtensionsList() -> list[str]: 33 c = m21.converter.Converter() 34 inList = c.subConvertersList('input') 35 result = [] 36 for subc in inList: 37 for inputExt in subc.registerInputExtensions: 38 result.append('.' + inputExt) 39 return result 40 41def _printSupportedInputFormats() -> None: 42 c = m21.converter.Converter() 43 inList = c.subConvertersList('input') 44 print("Supported input formats are:", file=sys.stderr) 45 for subc in inList: 46 if subc.registerInputExtensions: 47 print('\tformats : ' + ', '.join(subc.registerFormats) 48 + '\textensions: ' + ', '.join(subc.registerInputExtensions), file=sys.stderr) 49 50def diff( 51 score1: str | Path | m21.stream.Score, 52 score2: str | Path | m21.stream.Score, 53 out_path1: str | Path | None = None, 54 out_path2: str | Path | None = None, 55 force_parse: bool = True, 56 visualize_diffs: bool = True, 57 print_text_output: bool = False, 58 print_omr_ned_output: bool = False, 59 fix_first_file_syntax: bool = False, 60 detail: DetailLevel | int = DetailLevel.Default 61) -> int | None: 62 ''' 63 Compare two musical scores and optionally save/display the differences as two marked-up 64 rendered PDFs. 65 66 Args: 67 score1 (str, Path, music21.stream.Score): The first music score to compare. The score 68 can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI, 69 etc), or a music21 Score object. 70 score2 (str, Path, music21.stream.Score): The second musical score to compare. The score 71 can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI, 72 etc), or a music21 Score object. 73 out_path1 (str, Path): Where to save the first marked-up rendered score PDF. 74 If out_path1 is None, both PDFs will be displayed in the default PDF viewer. 75 (default is None) 76 out_path2 (str, Path): Where to save the second marked-up rendered score PDF. 77 If out_path2 is None, both PDFs will be displayed in the default PDF viewer. 78 (default is None) 79 force_parse (bool): Whether or not to force music21 to re-parse a file it has parsed 80 previously. 81 (default is True) 82 visualize_diffs (bool): Whether or not to render diffs as marked up PDFs. If False, 83 the only result of the call will be the return value (the number of differences). 84 (default is True) 85 print_text_output (bool): Whether or not to print diffs in diff-like text to stdout. 86 (default is False) 87 print_omr_ned_output (bool): Whether or not to print the OMR normalized edit distance 88 (OMR-NED), which is computed as OMR edit distance divided by the total number of 89 symbols in the two scores. 90 (default is False) 91 fix_first_file_syntax (bool): Whether to attempt to fix syntax errors in the first 92 file (and add the number of such fixes to the returned OMR edit distance). 93 (default is False) 94 detail (DetailLevel | int): What level of detail to use during the diff. 95 Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently 96 AllObjects), or any combination (with | or &~) of those or NotesAndRests, 97 Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures, 98 Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics, 99 Style, Metadata, or Voicing. 100 101 Returns: 102 int | None: The total OMR Edit Distance, i.e. the number of individual symbols 103 that must be added or deleted. (0 means that the scores were identical, and 104 None means that one or more of the input files failed to parse.) 105 ''' 106 # Use the Humdrum/MEI importers from converter21 in place of the ones in music21... 107 # Comment out this line to go back to music21's built-in Humdrum/MEI importers. 108 converter21.register() 109 110 badArg1: bool = False 111 badArg2: bool = False 112 score1Name: str | Path | None = None 113 score2Name: str | Path | None = None 114 115 # Convert input strings to Paths 116 if isinstance(score1, str): 117 score1Name = score1 118 try: 119 score1 = Path(score1) 120 except Exception: # pylint: disable=broad-exception-caught 121 print(f'score1 ({score1}) is not a valid path.', file=sys.stderr) 122 badArg1 = True 123 124 if isinstance(score2, str): 125 score2Name = score2 126 try: 127 score2 = Path(score2) 128 except Exception: # pylint: disable=broad-exception-caught 129 print(f'score2 ({score2}) is not a valid path.', file=sys.stderr) 130 badArg2 = True 131 132 if badArg1 or badArg2: 133 return None 134 135 if isinstance(score1, Path): 136 if not score1Name: 137 score1Name = score1 138 fileName1 = score1.name 139 fileExt1 = score1.suffix 140 141 if fileExt1 not in _getInputExtensionsList(): 142 print(f'score1 file extension ({fileExt1}) not supported by music21.', file=sys.stderr) 143 badArg1 = True 144 145 if not badArg1: 146 # pylint: disable=broad-except 147 try: 148 sc = m21.converter.parse( 149 score1, 150 forceSource=force_parse, 151 acceptSyntaxErrors=fix_first_file_syntax 152 ) 153 if t.TYPE_CHECKING: 154 assert isinstance(sc, m21.stream.Score) 155 score1 = sc 156 157 except Exception as e: 158 print(f'score1 ({fileName1}) could not be parsed by music21', file=sys.stderr) 159 print(e, file=sys.stderr) 160 badArg1 = True 161 # pylint: enable=broad-except 162 163 if isinstance(score2, Path): 164 if not score2Name: 165 score2Name = score2 166 fileName2: str = score2.name 167 fileExt2: str = score2.suffix 168 169 if fileExt2 not in _getInputExtensionsList(): 170 print(f'score2 file extension ({fileExt2}) not supported by music21.', file=sys.stderr) 171 badArg2 = True 172 173 if not badArg2: 174 # pylint: disable=broad-except 175 try: 176 sc = m21.converter.parse(score2, forceSource=force_parse) 177 if t.TYPE_CHECKING: 178 assert isinstance(sc, m21.stream.Score) 179 score2 = sc 180 except Exception as e: 181 print(f'score2 ({fileName2}) could not be parsed by music21', file=sys.stderr) 182 print(e, file=sys.stderr) 183 badArg2 = True 184 # pylint: enable=broad-except 185 186 if badArg1 or badArg2: 187 return None 188 189 if t.TYPE_CHECKING: 190 assert isinstance(score1, m21.stream.Score) 191 assert isinstance(score2, m21.stream.Score) 192 193 # scan each score, producing an annotated wrapper 194 annotated_score1: AnnScore = AnnScore(score1, detail) 195 annotated_score2: AnnScore = AnnScore(score2, detail) 196 197 diff_list: list 198 cost: int 199 diff_list, cost = Comparison.annotated_scores_diff(annotated_score1, annotated_score2) 200 201 if cost != 0: 202 if visualize_diffs: 203 # you can change these three colors as you like... 204 # Visualization.INSERTED_COLOR = 'red' 205 # Visualization.DELETED_COLOR = 'red' 206 # Visualization.CHANGED_COLOR = 'red' 207 208 # color changed/deleted/inserted notes, add descriptive text for each change, etc 209 Visualization.mark_diffs(score1, score2, diff_list) 210 211 # ask music21 to display the scores as PDFs. Composer's name will be prepended with 212 # 'score1 ' and 'score2 ', respectively, so you can see which is which. 213 Visualization.show_diffs(score1, score2, out_path1, out_path2) 214 215 if print_omr_ned_output: 216 omr_ned_output: dict = Visualization.get_omr_ned_output( 217 cost, annotated_score1, annotated_score2 218 ) 219 jsonStr: str = json.dumps(omr_ned_output, indent=4) 220 print(jsonStr) 221 222 if print_text_output: 223 text_output: str = Visualization.get_text_output( 224 score1, score2, diff_list, score1Name=score1Name, score2Name=score2Name 225 ) 226 if text_output: 227 if print_omr_ned_output and print_text_output: 228 # put a blank line between them 229 print('') 230 print(text_output) 231 232 return cost 233 234 235def _diff_omr_ned_metrics( 236 predpath: str | Path, 237 gtpath: str | Path, 238 detail: DetailLevel | int = DetailLevel.Default 239) -> EvaluationMetrics | None: 240 # Returns (numsyms_gt, numsyms_pred, omr_edit_distance, edit_distances_dict, omr_ned). 241 # Returns None if pred or gt is not a music21-importable format. 242 # If import is possible (correct format), but actually fails (incorrect content), 243 # the resulting score will be empty (and omr_ned will be 1.0). 244 245 # Convert input strings to Paths 246 if isinstance(predpath, str): 247 predpath = Path(predpath) 248 if isinstance(gtpath, str): 249 gtpath = Path(gtpath) 250 251 if predpath.suffix not in _getInputExtensionsList(): 252 print( 253 f'predicted file extension ({predpath.suffix}) not supported by music21.', 254 file=sys.stderr 255 ) 256 return None 257 258 try: 259 predscore = m21.converter.parse( 260 predpath, 261 forceSource=True, 262 acceptSyntaxErrors=True 263 ) 264 except Exception: 265 predscore = m21.stream.Score() 266 267 if gtpath.suffix not in _getInputExtensionsList(): 268 print( 269 f'ground truth file extension ({gtpath.suffix}) not supported by music21.', 270 file=sys.stderr 271 ) 272 return None 273 274 try: 275 gtscore = m21.converter.parse( 276 gtpath, 277 forceSource=True, 278 acceptSyntaxErrors=False 279 ) 280 except Exception: 281 gtscore = m21.stream.Score() 282 283 if t.TYPE_CHECKING: 284 assert isinstance(gtscore, m21.stream.Score) 285 assert isinstance(predscore, m21.stream.Score) 286 287 numParts: int = len(list(gtscore.parts)) 288 if numParts == 0: 289 return None 290 291 # scan each score, producing an annotated wrapper 292 if t.TYPE_CHECKING: 293 assert isinstance(predscore, m21.stream.Score) 294 assert isinstance(gtscore, m21.stream.Score) 295 ann_predscore: AnnScore = AnnScore(predscore, detail) 296 ann_gtscore: AnnScore = AnnScore(gtscore, detail) 297 298 numsyms_gt: int = ann_gtscore.notation_size() 299 numsyms_pred: int = ann_predscore.notation_size() 300 op_list: list 301 omr_edit_distance: int 302 op_list, omr_edit_distance = Comparison.annotated_scores_diff(ann_predscore, ann_gtscore) 303 edit_distances_dict: dict[str, int] = Visualization.get_edit_distances_dict( 304 op_list, 305 ann_predscore.num_syntax_errors_fixed, 306 detail 307 ) 308 omr_ned = Visualization.get_omr_ned(omr_edit_distance, numsyms_pred, numsyms_gt) 309 metrics = EvaluationMetrics( 310 gtpath, predpath, numsyms_gt, numsyms_pred, omr_edit_distance, edit_distances_dict, omr_ned 311 ) 312 return metrics 313 314 315def diff_ml_training( 316 predicted_folder: str, 317 ground_truth_folder: str, 318 output_folder: str, 319 detail: DetailLevel | int = DetailLevel.Default, 320) -> tuple[float, str]: 321 ''' 322 Compare two folders of musical scores, and produce a CSV spreadsheet of results, including 323 the overall OMR-NED score for the batch. 324 325 Args: 326 predicted_folder (str): The folder full of predicted scores. The scores 327 can be of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, etc). 328 Each score must have the exact same filename as the corresponding ground 329 truth score. 330 ground_truth_folder (str): The folder full of ground truth scores. Each score must 331 have the exact same filename as the corresponding predicted score. 332 output_folder (str): The folder in which to save the output spreadsheet (output.csv). 333 detail (DetailLevel | int): What level of detail to use during the comparisons. 334 Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently 335 AllObjects), or any combination (with | or &~) of those or NotesAndRests, 336 Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures, 337 Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics, 338 Style, Metadata, or Voicing. 339 340 Returns: 341 tuple[float, str]: Overall OMR-NED score for the batch, and the full path to the 342 output spreadsheet (output.csv). 343 ''' 344 345 converter21.register() 346 347 output_file_path: str = output_folder + '/output.csv' 348 349 # expand tildes 350 predicted_folder = os.path.expanduser(predicted_folder) 351 ground_truth_folder = os.path.expanduser(ground_truth_folder) 352 output_folder = os.path.expanduser(output_folder) 353 354 metrics_list: list[EvaluationMetrics] = [] 355 for name in os.listdir(predicted_folder): 356 predpath: str = os.path.join(predicted_folder, name) 357 358 # check if it is a file 359 if not os.path.isfile(predpath): 360 continue 361 362 # check if there is a same-named file in ground_truth_folder 363 gtpath: str = os.path.join(ground_truth_folder, name) 364 if not os.path.isfile(gtpath): 365 continue 366 367 metrics: EvaluationMetrics | None = _diff_omr_ned_metrics( 368 predpath=predpath, gtpath=gtpath, detail=detail 369 ) 370 if metrics is None: 371 continue 372 373 # append metrics to metrics_list 374 metrics_list.append(metrics) 375 376 # sort metrics_list the way you want it to appear in the csv file. 377 # I like it sorted by omr_ned (ascending), so the omr_ned == 0.0 entries 378 # are together at the top, and the omr_ned == 1.0 entries are together 379 # at the bottom. Within each group of "same omr_ned", sort by filename. 380 def natsortkey(path: str): 381 # splits path into chunks of digits and non-digits. Converts the digit 382 # chunks to integers for numerical comparison and the non-digit chunks 383 # to lowercase for case-insensitive comparison. 384 key: list[int | str] = [] 385 for chunk in re.split(r'(\d+)', path): 386 if chunk.isdigit(): 387 key.append(int(chunk)) 388 else: 389 key.append(chunk.lower()) 390 return key 391 392 metrics_list.sort(key=lambda m: (m.omr_ned, natsortkey(str(m.gt_path)))) 393 with open(output_file_path, 'wt', encoding='utf-8') as outf: 394 print(Visualization.get_output_csv_header(detail), file=outf) 395 396 for metrics in metrics_list: 397 # append CSV line to output file 398 # (gt path, pred path, gt numsyms, pred numsyms, sym edit cost, omr_ned 399 print(Visualization.get_output_csv_line(metrics, detail), file=outf) 400 401 # append overall score to output file (currently average SER) 402 total_gt_numsyms: int = 0 403 total_pred_numsyms: int = 0 404 total_omr_edit_distance: int = 0 405 if metrics_list: 406 for metrics in metrics_list: 407 total_gt_numsyms += metrics.gt_numsyms 408 total_pred_numsyms += metrics.pred_numsyms 409 total_omr_edit_distance += metrics.omr_edit_distance 410 411 overall_score: float = Visualization.get_omr_ned( 412 total_omr_edit_distance, total_pred_numsyms, total_gt_numsyms 413 ) 414 415 print(Visualization.get_output_csv_trailer(metrics_list, detail), file=outf) 416 outf.flush() 417 418 return overall_score, output_file_path
def
diff( score1: str | pathlib.Path | music21.stream.base.Score, score2: str | pathlib.Path | music21.stream.base.Score, out_path1: str | pathlib.Path | None = None, out_path2: str | pathlib.Path | None = None, force_parse: bool = True, visualize_diffs: bool = True, print_text_output: bool = False, print_omr_ned_output: bool = False, fix_first_file_syntax: bool = False, detail: musicdiff.detaillevel.DetailLevel | int = <DetailLevel.AllObjects: 32767>) -> int | None:
51def diff( 52 score1: str | Path | m21.stream.Score, 53 score2: str | Path | m21.stream.Score, 54 out_path1: str | Path | None = None, 55 out_path2: str | Path | None = None, 56 force_parse: bool = True, 57 visualize_diffs: bool = True, 58 print_text_output: bool = False, 59 print_omr_ned_output: bool = False, 60 fix_first_file_syntax: bool = False, 61 detail: DetailLevel | int = DetailLevel.Default 62) -> int | None: 63 ''' 64 Compare two musical scores and optionally save/display the differences as two marked-up 65 rendered PDFs. 66 67 Args: 68 score1 (str, Path, music21.stream.Score): The first music score to compare. The score 69 can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI, 70 etc), or a music21 Score object. 71 score2 (str, Path, music21.stream.Score): The second musical score to compare. The score 72 can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI, 73 etc), or a music21 Score object. 74 out_path1 (str, Path): Where to save the first marked-up rendered score PDF. 75 If out_path1 is None, both PDFs will be displayed in the default PDF viewer. 76 (default is None) 77 out_path2 (str, Path): Where to save the second marked-up rendered score PDF. 78 If out_path2 is None, both PDFs will be displayed in the default PDF viewer. 79 (default is None) 80 force_parse (bool): Whether or not to force music21 to re-parse a file it has parsed 81 previously. 82 (default is True) 83 visualize_diffs (bool): Whether or not to render diffs as marked up PDFs. If False, 84 the only result of the call will be the return value (the number of differences). 85 (default is True) 86 print_text_output (bool): Whether or not to print diffs in diff-like text to stdout. 87 (default is False) 88 print_omr_ned_output (bool): Whether or not to print the OMR normalized edit distance 89 (OMR-NED), which is computed as OMR edit distance divided by the total number of 90 symbols in the two scores. 91 (default is False) 92 fix_first_file_syntax (bool): Whether to attempt to fix syntax errors in the first 93 file (and add the number of such fixes to the returned OMR edit distance). 94 (default is False) 95 detail (DetailLevel | int): What level of detail to use during the diff. 96 Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently 97 AllObjects), or any combination (with | or &~) of those or NotesAndRests, 98 Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures, 99 Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics, 100 Style, Metadata, or Voicing. 101 102 Returns: 103 int | None: The total OMR Edit Distance, i.e. the number of individual symbols 104 that must be added or deleted. (0 means that the scores were identical, and 105 None means that one or more of the input files failed to parse.) 106 ''' 107 # Use the Humdrum/MEI importers from converter21 in place of the ones in music21... 108 # Comment out this line to go back to music21's built-in Humdrum/MEI importers. 109 converter21.register() 110 111 badArg1: bool = False 112 badArg2: bool = False 113 score1Name: str | Path | None = None 114 score2Name: str | Path | None = None 115 116 # Convert input strings to Paths 117 if isinstance(score1, str): 118 score1Name = score1 119 try: 120 score1 = Path(score1) 121 except Exception: # pylint: disable=broad-exception-caught 122 print(f'score1 ({score1}) is not a valid path.', file=sys.stderr) 123 badArg1 = True 124 125 if isinstance(score2, str): 126 score2Name = score2 127 try: 128 score2 = Path(score2) 129 except Exception: # pylint: disable=broad-exception-caught 130 print(f'score2 ({score2}) is not a valid path.', file=sys.stderr) 131 badArg2 = True 132 133 if badArg1 or badArg2: 134 return None 135 136 if isinstance(score1, Path): 137 if not score1Name: 138 score1Name = score1 139 fileName1 = score1.name 140 fileExt1 = score1.suffix 141 142 if fileExt1 not in _getInputExtensionsList(): 143 print(f'score1 file extension ({fileExt1}) not supported by music21.', file=sys.stderr) 144 badArg1 = True 145 146 if not badArg1: 147 # pylint: disable=broad-except 148 try: 149 sc = m21.converter.parse( 150 score1, 151 forceSource=force_parse, 152 acceptSyntaxErrors=fix_first_file_syntax 153 ) 154 if t.TYPE_CHECKING: 155 assert isinstance(sc, m21.stream.Score) 156 score1 = sc 157 158 except Exception as e: 159 print(f'score1 ({fileName1}) could not be parsed by music21', file=sys.stderr) 160 print(e, file=sys.stderr) 161 badArg1 = True 162 # pylint: enable=broad-except 163 164 if isinstance(score2, Path): 165 if not score2Name: 166 score2Name = score2 167 fileName2: str = score2.name 168 fileExt2: str = score2.suffix 169 170 if fileExt2 not in _getInputExtensionsList(): 171 print(f'score2 file extension ({fileExt2}) not supported by music21.', file=sys.stderr) 172 badArg2 = True 173 174 if not badArg2: 175 # pylint: disable=broad-except 176 try: 177 sc = m21.converter.parse(score2, forceSource=force_parse) 178 if t.TYPE_CHECKING: 179 assert isinstance(sc, m21.stream.Score) 180 score2 = sc 181 except Exception as e: 182 print(f'score2 ({fileName2}) could not be parsed by music21', file=sys.stderr) 183 print(e, file=sys.stderr) 184 badArg2 = True 185 # pylint: enable=broad-except 186 187 if badArg1 or badArg2: 188 return None 189 190 if t.TYPE_CHECKING: 191 assert isinstance(score1, m21.stream.Score) 192 assert isinstance(score2, m21.stream.Score) 193 194 # scan each score, producing an annotated wrapper 195 annotated_score1: AnnScore = AnnScore(score1, detail) 196 annotated_score2: AnnScore = AnnScore(score2, detail) 197 198 diff_list: list 199 cost: int 200 diff_list, cost = Comparison.annotated_scores_diff(annotated_score1, annotated_score2) 201 202 if cost != 0: 203 if visualize_diffs: 204 # you can change these three colors as you like... 205 # Visualization.INSERTED_COLOR = 'red' 206 # Visualization.DELETED_COLOR = 'red' 207 # Visualization.CHANGED_COLOR = 'red' 208 209 # color changed/deleted/inserted notes, add descriptive text for each change, etc 210 Visualization.mark_diffs(score1, score2, diff_list) 211 212 # ask music21 to display the scores as PDFs. Composer's name will be prepended with 213 # 'score1 ' and 'score2 ', respectively, so you can see which is which. 214 Visualization.show_diffs(score1, score2, out_path1, out_path2) 215 216 if print_omr_ned_output: 217 omr_ned_output: dict = Visualization.get_omr_ned_output( 218 cost, annotated_score1, annotated_score2 219 ) 220 jsonStr: str = json.dumps(omr_ned_output, indent=4) 221 print(jsonStr) 222 223 if print_text_output: 224 text_output: str = Visualization.get_text_output( 225 score1, score2, diff_list, score1Name=score1Name, score2Name=score2Name 226 ) 227 if text_output: 228 if print_omr_ned_output and print_text_output: 229 # put a blank line between them 230 print('') 231 print(text_output) 232 233 return cost
Compare two musical scores and optionally save/display the differences as two marked-up rendered PDFs.
Arguments:
- score1 (str, Path, music21.stream.Score): The first music score to compare. The score can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI, etc), or a music21 Score object.
- score2 (str, Path, music21.stream.Score): The second musical score to compare. The score can be a file of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, MIDI, etc), or a music21 Score object.
- out_path1 (str, Path): Where to save the first marked-up rendered score PDF. If out_path1 is None, both PDFs will be displayed in the default PDF viewer. (default is None)
- out_path2 (str, Path): Where to save the second marked-up rendered score PDF. If out_path2 is None, both PDFs will be displayed in the default PDF viewer. (default is None)
- force_parse (bool): Whether or not to force music21 to re-parse a file it has parsed previously. (default is True)
- visualize_diffs (bool): Whether or not to render diffs as marked up PDFs. If False, the only result of the call will be the return value (the number of differences). (default is True)
- print_text_output (bool): Whether or not to print diffs in diff-like text to stdout. (default is False)
- print_omr_ned_output (bool): Whether or not to print the OMR normalized edit distance (OMR-NED), which is computed as OMR edit distance divided by the total number of symbols in the two scores. (default is False)
- fix_first_file_syntax (bool): Whether to attempt to fix syntax errors in the first file (and add the number of such fixes to the returned OMR edit distance). (default is False)
- detail (DetailLevel | int): What level of detail to use during the diff. Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently AllObjects), or any combination (with | or &~) of those or NotesAndRests, Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures, Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics, Style, Metadata, or Voicing.
Returns:
int | None: The total OMR Edit Distance, i.e. the number of individual symbols that must be added or deleted. (0 means that the scores were identical, and None means that one or more of the input files failed to parse.)
def
diff_ml_training( predicted_folder: str, ground_truth_folder: str, output_folder: str, detail: musicdiff.detaillevel.DetailLevel | int = <DetailLevel.AllObjects: 32767>) -> tuple[float, str]:
316def diff_ml_training( 317 predicted_folder: str, 318 ground_truth_folder: str, 319 output_folder: str, 320 detail: DetailLevel | int = DetailLevel.Default, 321) -> tuple[float, str]: 322 ''' 323 Compare two folders of musical scores, and produce a CSV spreadsheet of results, including 324 the overall OMR-NED score for the batch. 325 326 Args: 327 predicted_folder (str): The folder full of predicted scores. The scores 328 can be of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, etc). 329 Each score must have the exact same filename as the corresponding ground 330 truth score. 331 ground_truth_folder (str): The folder full of ground truth scores. Each score must 332 have the exact same filename as the corresponding predicted score. 333 output_folder (str): The folder in which to save the output spreadsheet (output.csv). 334 detail (DetailLevel | int): What level of detail to use during the comparisons. 335 Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently 336 AllObjects), or any combination (with | or &~) of those or NotesAndRests, 337 Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures, 338 Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics, 339 Style, Metadata, or Voicing. 340 341 Returns: 342 tuple[float, str]: Overall OMR-NED score for the batch, and the full path to the 343 output spreadsheet (output.csv). 344 ''' 345 346 converter21.register() 347 348 output_file_path: str = output_folder + '/output.csv' 349 350 # expand tildes 351 predicted_folder = os.path.expanduser(predicted_folder) 352 ground_truth_folder = os.path.expanduser(ground_truth_folder) 353 output_folder = os.path.expanduser(output_folder) 354 355 metrics_list: list[EvaluationMetrics] = [] 356 for name in os.listdir(predicted_folder): 357 predpath: str = os.path.join(predicted_folder, name) 358 359 # check if it is a file 360 if not os.path.isfile(predpath): 361 continue 362 363 # check if there is a same-named file in ground_truth_folder 364 gtpath: str = os.path.join(ground_truth_folder, name) 365 if not os.path.isfile(gtpath): 366 continue 367 368 metrics: EvaluationMetrics | None = _diff_omr_ned_metrics( 369 predpath=predpath, gtpath=gtpath, detail=detail 370 ) 371 if metrics is None: 372 continue 373 374 # append metrics to metrics_list 375 metrics_list.append(metrics) 376 377 # sort metrics_list the way you want it to appear in the csv file. 378 # I like it sorted by omr_ned (ascending), so the omr_ned == 0.0 entries 379 # are together at the top, and the omr_ned == 1.0 entries are together 380 # at the bottom. Within each group of "same omr_ned", sort by filename. 381 def natsortkey(path: str): 382 # splits path into chunks of digits and non-digits. Converts the digit 383 # chunks to integers for numerical comparison and the non-digit chunks 384 # to lowercase for case-insensitive comparison. 385 key: list[int | str] = [] 386 for chunk in re.split(r'(\d+)', path): 387 if chunk.isdigit(): 388 key.append(int(chunk)) 389 else: 390 key.append(chunk.lower()) 391 return key 392 393 metrics_list.sort(key=lambda m: (m.omr_ned, natsortkey(str(m.gt_path)))) 394 with open(output_file_path, 'wt', encoding='utf-8') as outf: 395 print(Visualization.get_output_csv_header(detail), file=outf) 396 397 for metrics in metrics_list: 398 # append CSV line to output file 399 # (gt path, pred path, gt numsyms, pred numsyms, sym edit cost, omr_ned 400 print(Visualization.get_output_csv_line(metrics, detail), file=outf) 401 402 # append overall score to output file (currently average SER) 403 total_gt_numsyms: int = 0 404 total_pred_numsyms: int = 0 405 total_omr_edit_distance: int = 0 406 if metrics_list: 407 for metrics in metrics_list: 408 total_gt_numsyms += metrics.gt_numsyms 409 total_pred_numsyms += metrics.pred_numsyms 410 total_omr_edit_distance += metrics.omr_edit_distance 411 412 overall_score: float = Visualization.get_omr_ned( 413 total_omr_edit_distance, total_pred_numsyms, total_gt_numsyms 414 ) 415 416 print(Visualization.get_output_csv_trailer(metrics_list, detail), file=outf) 417 outf.flush() 418 419 return overall_score, output_file_path
Compare two folders of musical scores, and produce a CSV spreadsheet of results, including the overall OMR-NED score for the batch.
Arguments:
- predicted_folder (str): The folder full of predicted scores. The scores can be of any format readable by music21 (e.g. MusicXML, MEI, Humdrum, etc). Each score must have the exact same filename as the corresponding ground truth score.
- ground_truth_folder (str): The folder full of ground truth scores. Each score must have the exact same filename as the corresponding predicted score.
- output_folder (str): The folder in which to save the output spreadsheet (output.csv).
- detail (DetailLevel | int): What level of detail to use during the comparisons. Can be DecoratedNotesAndRests, OtherObjects, AllObjects, Default (currently AllObjects), or any combination (with | or &~) of those or NotesAndRests, Beams, Tremolos, Ornaments, Articulations, Ties, Slurs, Signatures, Directions, Barlines, StaffDetails, ChordSymbols, Ottavas, Arpeggios, Lyrics, Style, Metadata, or Voicing.
Returns:
tuple[float, str]: Overall OMR-NED score for the batch, and the full path to the output spreadsheet (output.csv).