Updated the tagger to scan all files.

This commit is contained in:
Bokuan Li
2025-12-21 03:10:32 -05:00
parent c021e0f27b
commit ba0cf9a315
2 changed files with 39 additions and 33 deletions

View File

@@ -4,64 +4,70 @@ import re
# no I, no O
CHARACTERS = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ"
# convert integer to tag
def tobase(i):
global CHARACTERS
global CHARACTERS
assert i >= 0
assert i >= 0
if i < len(CHARACTERS):
return CHARACTERS[i]
else:
return tobase(i // len(CHARACTERS)) + CHARACTERS[i % len(CHARACTERS)]
if i < len(CHARACTERS):
return CHARACTERS[i]
else:
return tobase(i // len(CHARACTERS)) + CHARACTERS[i % len(CHARACTERS)]
def totag(i):
return tobase(i).rjust(4, "0")
return tobase(i).rjust(4, "0")
# convert tag to integer
def toint(tag):
global CHARACTERS
return sum([CHARACTERS.index(tag[i]) * len(CHARACTERS)**(4-i-1) for i in range(4)])
global CHARACTERS
return sum(
[CHARACTERS.index(tag[i]) * len(CHARACTERS) ** (4 - i - 1) for i in range(4)]
)
tags = dict()
labels = dict()
inactive = []
try:
with open("tags") as f:
for line in f:
# actual tag
if not line.startswith("#"):
tags[line.split(",")[0]] = line.strip().split(",")[1]
labels[line.strip().split(",")[1]] = line.strip().split(",")[0]
with open("tags") as f:
for line in f:
# actual tag
if not line.startswith("#"):
tags[line.split(",")[0]] = line.strip().split(",")[1]
labels[line.strip().split(",")[1]] = line.strip().split(",")[0]
# check for inactive tags too
elif len(line.split(",")) == 2 and len(line.split(",")[0]) == 4:
inactive.append(line.split(",")[0])
# check for inactive tags too
elif len(line.split(",")) == 2 and len(line.split(",")[0]) == 4:
inactive.append(line.split(",")[0])
except FileNotFoundError:
pass
pass
# determine last assigned tag
try:
last = toint(sorted(list(tags.keys()) + inactive)[-1])
last = toint(sorted(list(tags.keys()) + inactive)[-1])
except IndexError:
last = -1
last = -1
#filenames = glob.glob("*.tex")
filenames = ["document.tex"]
filenames = glob.glob("*.tex")
# filenames = ["document.tex"]
# where we should start
i = last + 1
for filename in filenames:
with open(filename) as f:
# do this line per line to deal with comments
for line in f:
matches = re.findall("\\\\label{([^}]+)}", line.split("%")[0])
for label in matches:
if not label in labels:
tag = tobase(i).rjust(4, "0")
print("%s,%s" % (tag, label))
i = i + 1
with open(filename) as f:
# do this line per line to deal with comments
for line in f:
matches = re.findall("\\\\label{([^}]+)}", line.split("%")[0])
for label in matches:
if not label in labels:
tag = tobase(i).rjust(4, "0")
print("%s,%s" % (tag, label))
i = i + 1