feat: add SYNC command to merge entries with delta
This commit is contained in:
parent
07ef135a82
commit
50d8e6d261
1 changed files with 72 additions and 25 deletions
97
merge_srt.py
97
merge_srt.py
|
@ -63,18 +63,60 @@ def read_commands(filename):
|
||||||
except (ValueError, AttributeError):
|
except (ValueError, AttributeError):
|
||||||
continue
|
continue
|
||||||
commands.append(('MAP', text_source, text_start, time_source, time_start, count))
|
commands.append(('MAP', text_source, text_start, time_source, time_start, count))
|
||||||
|
elif command == 'SYNC':
|
||||||
|
if len(parts) != 3:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
source1_index = int(parts[1])
|
||||||
|
source2_index = int(parts[2])
|
||||||
|
except (ValueError, AttributeError):
|
||||||
|
continue
|
||||||
|
commands.append(('SYNC', source1_index, source2_index))
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
return commands
|
return commands
|
||||||
|
|
||||||
|
def parse_timestamp(ts_str):
|
||||||
|
start_str, end_str = ts_str.split(' --> ')
|
||||||
|
def parse_part(part):
|
||||||
|
parts = part.split(':')
|
||||||
|
hours = int(parts[0])
|
||||||
|
minutes = int(parts[1])
|
||||||
|
sec_ms = parts[2].split(',')
|
||||||
|
seconds = int(sec_ms[0])
|
||||||
|
ms = int(sec_ms[1])
|
||||||
|
return hours * 3600000 + minutes * 60000 + seconds * 1000 + ms
|
||||||
|
return parse_part(start_str), parse_part(end_str)
|
||||||
|
|
||||||
|
def compute_delta(prev_ts_str, curr_ts_str):
|
||||||
|
prev_start, _ = parse_timestamp(prev_ts_str)
|
||||||
|
curr_start, _ = parse_timestamp(curr_ts_str)
|
||||||
|
return curr_start - prev_start
|
||||||
|
|
||||||
|
def format_time(ms):
|
||||||
|
total_seconds = ms // 1000
|
||||||
|
ms_part = ms % 1000
|
||||||
|
hours = total_seconds // 3600
|
||||||
|
minutes = (total_seconds // 60) % 60
|
||||||
|
seconds = total_seconds % 60
|
||||||
|
return hours, minutes, seconds, ms_part
|
||||||
|
|
||||||
|
def format_timestamp(start_ms, end_ms):
|
||||||
|
def format_part(h, m, s, ms):
|
||||||
|
return f"{h:02}:{m:02}:{s:02},{ms:03}"
|
||||||
|
start_h, start_m, start_s, start_ms_part = format_time(start_ms)
|
||||||
|
end_h, end_m, end_s, end_ms_part = format_time(end_ms)
|
||||||
|
return f"{format_part(start_h, start_m, start_s, start_ms_part)} --> {format_part(end_h, end_m, end_s, end_ms_part)}"
|
||||||
|
|
||||||
|
def add_delta_to_timestamp(ts_str, delta):
|
||||||
|
start, end = parse_timestamp(ts_str)
|
||||||
|
return format_timestamp(start + delta, end + delta)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
if len(sys.argv) != 5:
|
if len(sys.argv) != 5:
|
||||||
print("Usage: python merge_srt.py <srt1> <srt2> <commands> <output>")
|
print("Usage: python merge_srt.py <srt1> <srt2> <commands> <output>")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
srt1_filename = sys.argv[1]
|
srt1_filename, srt2_filename, commands_filename, output_filename = sys.argv[1:5]
|
||||||
srt2_filename = sys.argv[2]
|
|
||||||
commands_filename = sys.argv[3]
|
|
||||||
output_filename = sys.argv[4]
|
|
||||||
|
|
||||||
srt1 = parse_srt(srt1_filename)
|
srt1 = parse_srt(srt1_filename)
|
||||||
srt2 = parse_srt(srt2_filename)
|
srt2 = parse_srt(srt2_filename)
|
||||||
|
@ -83,42 +125,47 @@ if __name__ == '__main__':
|
||||||
output = []
|
output = []
|
||||||
for cmd in commands:
|
for cmd in commands:
|
||||||
if cmd[0] == 'COPY':
|
if cmd[0] == 'COPY':
|
||||||
_, source, start, end = cmd
|
source, start, end = cmd[1:]
|
||||||
source_list = srt1 if source == 1 else srt2
|
source_list = srt1 if source == 1 else srt2
|
||||||
start_idx = start - 1
|
start_idx, end_idx = start - 1, end - 1
|
||||||
end_idx = end - 1
|
|
||||||
if start_idx < 0 or end_idx >= len(source_list) or start_idx > end_idx:
|
if start_idx < 0 or end_idx >= len(source_list) or start_idx > end_idx:
|
||||||
print(f"Skipping invalid COPY command: source {source}, range {start}-{end}")
|
print(f"Skipping invalid COPY command: source {source}, range {start}-{end}")
|
||||||
continue
|
continue
|
||||||
entries = source_list[start_idx:end_idx + 1]
|
output.extend(source_list[start_idx:end_idx + 1])
|
||||||
output.extend(entries)
|
|
||||||
elif cmd[0] == 'MAP':
|
elif cmd[0] == 'MAP':
|
||||||
_, text_source, text_start, time_source, time_start, count = cmd
|
text_source, text_start, time_source, time_start, count = cmd[1:]
|
||||||
text_list = srt1 if text_source == 1 else srt2
|
text_list = srt1 if text_source == 1 else srt2
|
||||||
time_list = srt1 if time_source == 1 else srt2
|
time_list = srt1 if time_source == 1 else srt2
|
||||||
text_start_idx = text_start - 1
|
text_start_idx = text_start - 1
|
||||||
time_start_idx = time_start - 1
|
time_start_idx = time_start - 1
|
||||||
if text_start_idx < 0 or text_start_idx + count > len(text_list):
|
if (text_start_idx < 0 or text_start_idx + count > len(text_list) or
|
||||||
print(f"Skipping invalid MAP command: text source {text_source}, start {text_start}, count {count}")
|
time_start_idx < 0 or time_start_idx + count > len(time_list)):
|
||||||
continue
|
print(f"Skipping invalid MAP command: text source {text_source}, start {text_start}, count {count} or time source {time_source}, start {time_start}, count {count}")
|
||||||
if time_start_idx < 0 or time_start_idx + count > len(time_list):
|
|
||||||
print(f"Skipping invalid MAP command: time source {time_source}, start {time_start}, count {count}")
|
|
||||||
continue
|
continue
|
||||||
for i in range(count):
|
for i in range(count):
|
||||||
text_entry = text_list[text_start_idx + i]
|
text_entry = text_list[text_start_idx + i]
|
||||||
time_entry = time_list[time_start_idx + i]
|
time_entry = time_list[time_start_idx + i]
|
||||||
new_entry = {
|
output.append({
|
||||||
'index': len(output) + 1,
|
'index': len(output) + 1,
|
||||||
'timestamp': time_entry['timestamp'],
|
'timestamp': time_entry['timestamp'],
|
||||||
'text': text_entry['text'],
|
'text': text_entry['text'],
|
||||||
}
|
})
|
||||||
output.append(new_entry)
|
elif cmd[0] == 'SYNC':
|
||||||
|
source1_index, source2_index = cmd[1:]
|
||||||
|
if source1_index < 2 or source1_index > len(srt1) or source2_index < 1 or source2_index > len(srt2):
|
||||||
|
print(f"Skipping invalid SYNC command: source1 index {source1_index} must be >=2 and <= {len(srt1)}, source2 index {source2_index} must be >=1 and <= {len(srt2)}")
|
||||||
|
continue
|
||||||
|
entry_prev = srt1[source1_index - 2]
|
||||||
|
entry1 = srt1[source1_index - 1]
|
||||||
|
entry2 = srt2[source2_index - 1]
|
||||||
|
delta = compute_delta(entry_prev['timestamp'], entry1['timestamp'])
|
||||||
|
new_ts = add_delta_to_timestamp(entry2['timestamp'], delta)
|
||||||
|
output.append({
|
||||||
|
'index': len(output) + 1,
|
||||||
|
'timestamp': new_ts,
|
||||||
|
'text': entry2['text'],
|
||||||
|
})
|
||||||
|
|
||||||
with open(output_filename, 'w', encoding='utf-8') as f:
|
with open(output_filename, 'w', encoding='utf-8') as f:
|
||||||
for entry in output:
|
for i, entry in enumerate(output, start=1):
|
||||||
f.write(f"{entry['index']}\n")
|
f.write(f"{i}\n{entry['timestamp']}\n{entry['text'].strip()}\n\n")
|
||||||
f.write(entry['timestamp'] + '\n')
|
|
||||||
text = entry['text'].strip()
|
|
||||||
for line in text.split('\n'):
|
|
||||||
f.write(line + '\n')
|
|
||||||
f.write('\n')
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue