Enhance Markdown parsing: support escaped asterisks and improve segment handling

This commit is contained in:
Ignacio Rivero 2025-06-22 02:37:03 -03:00
parent b59f77fee5
commit 65fa1c1497

16
app.py
View File

@ -286,6 +286,10 @@ def parse_line(line):
return ('text', parse_segments(line)) return ('text', parse_segments(line))
def parse_segments(line): def parse_segments(line):
# Handle escaped asterisks: replace them with a placeholder
line = line.replace(r'\*', '\x07')
# Apply formatting
bi = re.findall(r"\*\*\*(.+?)\*\*\*", line) bi = re.findall(r"\*\*\*(.+?)\*\*\*", line)
for x in bi: for x in bi:
line = line.replace(f"***{x}***", f"\x01{x}\x02") line = line.replace(f"***{x}***", f"\x01{x}\x02")
@ -295,39 +299,37 @@ def parse_segments(line):
i = re.findall(r"\*(.+?)\*", line) i = re.findall(r"\*(.+?)\*", line)
for x in i: for x in i:
line = line.replace(f"*{x}*", f"\x05{x}\x06") line = line.replace(f"*{x}*", f"\x05{x}\x06")
# Split into styled segments
segments = [] segments = []
i = 0 i = 0
style = 'text'
while i < len(line): while i < len(line):
if line[i] == '\x01': if line[i] == '\x01':
style = 'bolditalic'
i += 1 i += 1
start = i start = i
while i < len(line) and line[i] != '\x02': i += 1 while i < len(line) and line[i] != '\x02': i += 1
segments.append(('bolditalic', line[start:i])) segments.append(('bolditalic', line[start:i]))
style = 'text'
i += 1 i += 1
elif line[i] == '\x03': elif line[i] == '\x03':
style = 'bold'
i += 1 i += 1
start = i start = i
while i < len(line) and line[i] != '\x04': i += 1 while i < len(line) and line[i] != '\x04': i += 1
segments.append(('bold', line[start:i])) segments.append(('bold', line[start:i]))
style = 'text'
i += 1 i += 1
elif line[i] == '\x05': elif line[i] == '\x05':
style = 'italic'
i += 1 i += 1
start = i start = i
while i < len(line) and line[i] != '\x06': i += 1 while i < len(line) and line[i] != '\x06': i += 1
segments.append(('italic', line[start:i])) segments.append(('italic', line[start:i]))
style = 'text'
i += 1 i += 1
else: else:
start = i start = i
while i < len(line) and line[i] not in '\x01\x03\x05': i += 1 while i < len(line) and line[i] not in '\x01\x03\x05': i += 1
if i > start: if i > start:
segments.append(('text', line[start:i])) segments.append(('text', line[start:i]))
# Restore literal asterisks
segments = [(style, text.replace('\x07', '*')) for style, text in segments]
return segments return segments
def font_for_style(style, font, font_bold, font_italic, font_bolditalic): def font_for_style(style, font, font_bold, font_italic, font_bolditalic):