fix: collect and report errors during parsing and command processing
This commit is contained in:
parent
974a0b44aa
commit
91caba2091
1 changed files with 71 additions and 30 deletions
101
merge_srt.py
101
merge_srt.py
|
@ -4,54 +4,85 @@ import json
|
||||||
|
|
||||||
class SRTMerger:
|
class SRTMerger:
|
||||||
def __init__(self, srt1_filename, srt2_filename, commands_filename, output_filename):
|
def __init__(self, srt1_filename, srt2_filename, commands_filename, output_filename):
|
||||||
self.srt1 = self.parse_srt(srt1_filename)
|
self.errors = []
|
||||||
self.srt2 = self.parse_srt(srt2_filename)
|
self.srt1, srt1_errors = self.parse_srt(srt1_filename)
|
||||||
self.commands = self.read_commands(commands_filename)
|
self.errors.extend(srt1_errors)
|
||||||
|
self.srt2, srt2_errors = self.parse_srt(srt2_filename)
|
||||||
|
self.errors.extend(srt2_errors)
|
||||||
|
self.commands, cmd_errors = self.read_commands(commands_filename)
|
||||||
|
self.errors.extend(cmd_errors)
|
||||||
self.output = []
|
self.output = []
|
||||||
self.output_filename = output_filename
|
self.output_filename = output_filename
|
||||||
|
|
||||||
def parse_srt(self, filename):
|
def parse_srt(self, filename):
|
||||||
with open(filename, 'r', encoding='utf-8-sig') as f:
|
|
||||||
content = f.read().strip()
|
|
||||||
blocks = content.split('\n\n')
|
|
||||||
entries = []
|
entries = []
|
||||||
for block in blocks:
|
errors = []
|
||||||
|
try:
|
||||||
|
with open(filename, 'r', encoding='utf-8-sig') as f:
|
||||||
|
content = f.read().strip()
|
||||||
|
except IOError as e:
|
||||||
|
errors.append(f"Error reading {filename}: {e}")
|
||||||
|
return entries, errors
|
||||||
|
blocks = content.split('\n\n')
|
||||||
|
for block_number, block in enumerate(blocks, start=1):
|
||||||
lines = block.split('\n')
|
lines = block.split('\n')
|
||||||
if len(lines) < 3:
|
if len(lines) < 3:
|
||||||
|
errors.append(f"Block {block_number} in {filename} has less than 3 lines.")
|
||||||
continue
|
continue
|
||||||
timestamp = lines[1]
|
try:
|
||||||
text = '\n'.join(lines[2:]).strip()
|
timestamp_line = lines[1]
|
||||||
entries.append({
|
text = '\n'.join(lines[2:]).strip()
|
||||||
'timestamp': timestamp,
|
entries.append({
|
||||||
'text': text,
|
'timestamp': timestamp_line,
|
||||||
})
|
'text': text,
|
||||||
return entries
|
})
|
||||||
|
except Exception as e:
|
||||||
|
errors.append(f"Error parsing block {block_number} in {filename}: {e}")
|
||||||
|
return entries, errors
|
||||||
|
|
||||||
def read_commands(self, filename):
|
def read_commands(self, filename):
|
||||||
with open(filename, 'r', encoding='utf-8') as f:
|
|
||||||
lines = f.readlines()
|
|
||||||
commands = []
|
commands = []
|
||||||
for line in lines:
|
errors = []
|
||||||
|
try:
|
||||||
|
with open(filename, 'r', encoding='utf-8') as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
except IOError as e:
|
||||||
|
errors.append(f"Error reading {filename}: {e}")
|
||||||
|
return commands, errors
|
||||||
|
for line_number, line in enumerate(lines, start=1):
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if not line or line.startswith('#'):
|
if not line:
|
||||||
|
continue
|
||||||
|
if line.startswith('#'):
|
||||||
continue
|
continue
|
||||||
parts = shlex.split(line)
|
parts = shlex.split(line)
|
||||||
if not parts:
|
if not parts:
|
||||||
|
errors.append(f"Line {line_number}: empty command after splitting.")
|
||||||
continue
|
continue
|
||||||
command = parts[0].upper()
|
command = parts[0].upper()
|
||||||
|
if command not in ['COPY', 'MAP', 'SYNC', 'PUT']:
|
||||||
|
errors.append(f"Line {line_number}: unknown command '{command}'.")
|
||||||
|
continue
|
||||||
|
parsed = None
|
||||||
if command == 'COPY':
|
if command == 'COPY':
|
||||||
parsed = self.parse_copy(parts)
|
parsed = self.parse_copy(parts)
|
||||||
|
if parsed is None:
|
||||||
|
errors.append(f"Line {line_number}: invalid COPY command.")
|
||||||
elif command == 'MAP':
|
elif command == 'MAP':
|
||||||
parsed = self.parse_map(parts)
|
parsed = self.parse_map(parts)
|
||||||
|
if parsed is None:
|
||||||
|
errors.append(f"Line {line_number}: invalid MAP command.")
|
||||||
elif command == 'SYNC':
|
elif command == 'SYNC':
|
||||||
parsed = self.parse_sync(parts)
|
parsed = self.parse_sync(parts)
|
||||||
|
if parsed is None:
|
||||||
|
errors.append(f"Line {line_number}: invalid SYNC command.")
|
||||||
elif command == 'PUT':
|
elif command == 'PUT':
|
||||||
parsed = self.parse_put(parts)
|
parsed = self.parse_put(parts)
|
||||||
else:
|
if parsed is None:
|
||||||
parsed = None
|
errors.append(f"Line {line_number}: invalid PUT command.")
|
||||||
if parsed is not None:
|
if parsed is not None:
|
||||||
commands.append(parsed)
|
commands.append(parsed)
|
||||||
return commands
|
return commands, errors
|
||||||
|
|
||||||
def parse_copy(self, parts):
|
def parse_copy(self, parts):
|
||||||
if len(parts) != 3:
|
if len(parts) != 3:
|
||||||
|
@ -114,7 +145,6 @@ class SRTMerger:
|
||||||
return None
|
return None
|
||||||
text = data['text']
|
text = data['text']
|
||||||
except (ValueError, json.JSONDecodeError, TypeError) as e:
|
except (ValueError, json.JSONDecodeError, TypeError) as e:
|
||||||
print(f"Error parsing PUT command: {e}")
|
|
||||||
return None
|
return None
|
||||||
return ('PUT', source, index, text)
|
return ('PUT', source, index, text)
|
||||||
|
|
||||||
|
@ -128,13 +158,12 @@ class SRTMerger:
|
||||||
self.handle_sync(*cmd[1:])
|
self.handle_sync(*cmd[1:])
|
||||||
elif cmd[0] == 'PUT':
|
elif cmd[0] == 'PUT':
|
||||||
self.handle_put(*cmd[1:])
|
self.handle_put(*cmd[1:])
|
||||||
self.write_output()
|
|
||||||
|
|
||||||
def handle_copy(self, source, start, end):
|
def handle_copy(self, source, start, end):
|
||||||
source_list = self.srt1 if source == 1 else self.srt2
|
source_list = self.srt1 if source == 1 else self.srt2
|
||||||
start_idx, end_idx = start - 1, end - 1
|
start_idx, end_idx = start - 1, end - 1
|
||||||
if start_idx < 0 or end_idx >= len(source_list) or start_idx > end_idx:
|
if start_idx < 0 or end_idx >= len(source_list) or start_idx > end_idx:
|
||||||
print(f"Skipping invalid COPY command: source {source}, range {start}-{end}")
|
self.errors.append(f"Invalid COPY command: source {source}, range {start}-{end}.")
|
||||||
return
|
return
|
||||||
self.output.extend(source_list[start_idx:end_idx + 1])
|
self.output.extend(source_list[start_idx:end_idx + 1])
|
||||||
|
|
||||||
|
@ -146,7 +175,7 @@ class SRTMerger:
|
||||||
time_start_idx = time_start - 1
|
time_start_idx = time_start - 1
|
||||||
if (text_start_idx < 0 or text_start_idx + count > len(text_list) or
|
if (text_start_idx < 0 or text_start_idx + count > len(text_list) or
|
||||||
time_start_idx < 0 or time_start_idx + count > len(time_list)):
|
time_start_idx < 0 or time_start_idx + count > len(time_list)):
|
||||||
print(f"Skipping invalid MAP command: source {text_source}, text start {text_start}, time start {time_start}, count {count}")
|
self.errors.append(f"Invalid MAP command: source {text_source}, text start {text_start}, time start {time_start}, count {count}.")
|
||||||
return
|
return
|
||||||
for i in range(count):
|
for i in range(count):
|
||||||
text_entry = text_list[text_start_idx + i]
|
text_entry = text_list[text_start_idx + i]
|
||||||
|
@ -164,7 +193,7 @@ class SRTMerger:
|
||||||
prev_time_idx = time_start_idx - 1
|
prev_time_idx = time_start_idx - 1
|
||||||
if (text_start_idx < 0 or text_start_idx >= len(text_list) or
|
if (text_start_idx < 0 or text_start_idx >= len(text_list) or
|
||||||
time_start_idx < 1 or prev_time_idx < 0 or time_start_idx >= len(time_list)):
|
time_start_idx < 1 or prev_time_idx < 0 or time_start_idx >= len(time_list)):
|
||||||
print(f"Skipping invalid SYNC command: text index {text_index} must be >=1 and <= {len(text_list)}, time index {time_index} must be >=2 and <= {len(time_list)}")
|
self.errors.append(f"Invalid SYNC command: text index {text_index} must be >=1 and <= {len(text_list)}, time index {time_index} must be >=2 and <= {len(time_list)}.")
|
||||||
return
|
return
|
||||||
text_entry = text_list[text_start_idx]
|
text_entry = text_list[text_start_idx]
|
||||||
time_entry = time_list[time_start_idx]
|
time_entry = time_list[time_start_idx]
|
||||||
|
@ -180,7 +209,7 @@ class SRTMerger:
|
||||||
source_list = self.srt1 if source == 1 else self.srt2
|
source_list = self.srt1 if source == 1 else self.srt2
|
||||||
index_idx = index - 1
|
index_idx = index - 1
|
||||||
if index_idx < 0 or index_idx >= len(source_list):
|
if index_idx < 0 or index_idx >= len(source_list):
|
||||||
print(f"Skipping invalid PUT command: source {source}, index {index} is out of bounds")
|
self.errors.append(f"Invalid PUT command: source {source}, index {index} is out of bounds.")
|
||||||
return
|
return
|
||||||
entry = source_list[index_idx]
|
entry = source_list[index_idx]
|
||||||
new_entry = {
|
new_entry = {
|
||||||
|
@ -193,8 +222,11 @@ class SRTMerger:
|
||||||
if self.output_filename == '-':
|
if self.output_filename == '-':
|
||||||
out = sys.stdout
|
out = sys.stdout
|
||||||
else:
|
else:
|
||||||
out = open(self.output_filename, 'w', encoding='utf-8')
|
try:
|
||||||
|
out = open(self.output_filename, 'w', encoding='utf-8')
|
||||||
|
except IOError as e:
|
||||||
|
print(f"Error writing to {self.output_filename}: {e}", file=sys.stderr)
|
||||||
|
return
|
||||||
try:
|
try:
|
||||||
for i, entry in enumerate(self.output, start=1):
|
for i, entry in enumerate(self.output, start=1):
|
||||||
out.write(f"{i}\n{entry['timestamp']}\n{entry['text'].strip()}\n\n")
|
out.write(f"{i}\n{entry['timestamp']}\n{entry['text'].strip()}\n\n")
|
||||||
|
@ -245,8 +277,17 @@ class SRTMerger:
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
if len(sys.argv) != 5:
|
if len(sys.argv) != 5:
|
||||||
print("Usage: python merge_srt.py <srt1> <srt2> <commands> <output>")
|
print("Usage: python merge_srt.py <srt1> <srt2> <commands> <output>", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
srt1_filename, srt2_filename, commands_filename, output_filename = sys.argv[1:5]
|
srt1_filename, srt2_filename, commands_filename, output_filename = sys.argv[1:5]
|
||||||
merger = SRTMerger(srt1_filename, srt2_filename, commands_filename, output_filename)
|
merger = SRTMerger(srt1_filename, srt2_filename, commands_filename, output_filename)
|
||||||
|
if merger.errors:
|
||||||
|
for error in merger.errors:
|
||||||
|
print(error, file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
merger.process_commands()
|
merger.process_commands()
|
||||||
|
if merger.errors:
|
||||||
|
for error in merger.errors:
|
||||||
|
print(error, file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
merger.write_output()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue