Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docstring description multiline parsing #476

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
125 changes: 119 additions & 6 deletions fire/docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,13 @@ def parse(docstring):
state.returns.lines = []
state.yields.lines = []
state.raises.lines = []
state.max_line_length = max(len(line) for line in lines)

for index, line in enumerate(lines):
has_next = index + 1 < lines_len
previous_line = lines[index - 1] if index > 0 else None
next_line = lines[index + 1] if has_next else None
line_info = _create_line_info(line, next_line, previous_line)
line_info = _create_line_info(line, next_line, previous_line, index)
_consume_line(line_info, state)

summary = ' '.join(state.summary.lines) if state.summary.lines else None
Expand Down Expand Up @@ -269,7 +270,7 @@ def _join_lines(lines):
group_lines = []

if group_lines: # Process the final group.
group_text = ' '.join(group_lines)
group_text = '\n'.join(group_lines)
group_texts.append(group_text)

return '\n\n'.join(group_texts)
Expand All @@ -296,6 +297,11 @@ def _get_or_create_arg_by_name(state, name, is_kwarg=False):
arg.name = name
arg.type.lines = []
arg.description.lines = []
arg.line1_index = None
arg.line1_length = None
arg.line2_first_word_length = None
arg.line2_length = None
arg.line3_first_word_length = None
if is_kwarg:
state.kwargs.append(arg)
else:
Expand Down Expand Up @@ -336,9 +342,10 @@ def _as_arg_name_and_type(text):
None otherwise.
"""
tokens = text.split()
is_type = any(c in "[](){}" for c in text)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What was the reason for this line?

Copy link
Author

@thebadcoder96 thebadcoder96 Feb 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't tested this, but if we add : after the first word in descriptions (line2 or later), the _is_arg_name() function will be called and according to our current logic, anything that is just a word will be considered as an arg. I think we want a better way to identify an arg.

This was the initial reason for this. I just came up with this for the time being but i think that we need a better way to identify an arg name.

if len(tokens) < 2:
return None
if _is_arg_name(tokens[0]):
if is_type and _is_arg_name(tokens[0]):
type_token = ' '.join(tokens[1:])
type_token = type_token.lstrip('{([').rstrip('])}')
return tokens[0], type_token
Expand Down Expand Up @@ -392,9 +399,11 @@ def _consume_google_args_line(line_info, state):
split_line = line_info.remaining.split(':', 1)
if len(split_line) > 1:
first, second = split_line # first is either the "arg" or "arg (type)"
if _is_arg_name(first.strip()):
if _is_arg_name(first):
arg = _get_or_create_arg_by_name(state, first.strip())
arg.description.lines.append(second.strip())
arg.line1_index = line_info.index
arg.line1_length = len(line_info.line)
state.current_arg = arg
else:
arg_name_and_type = _as_arg_name_and_type(first)
Expand All @@ -403,13 +412,111 @@ def _consume_google_args_line(line_info, state):
arg = _get_or_create_arg_by_name(state, arg_name)
arg.type.lines.append(type_str)
arg.description.lines.append(second.strip())
arg.line1_index = line_info.index
arg.line1_length = len(line_info.line)
state.current_arg = arg
else:
if state.current_arg:
state.current_arg.description.lines.append(split_line[0])
state.current_arg.description.lines.append(':'.join(split_line))
_check_line2_line3(line_info, state)
else:
if state.current_arg:
state.current_arg.description.lines.append(split_line[0])
_check_line2_line3(line_info, state)


def _check_line2_line3(line_info, state):
"""Checks for line2 and line3, updating the arg states

Args:
line_info: information about the current line.
state: The state of the docstring parser.
"""
if line_info.previous.index == state.current_arg.line1_index: # line2 check
line2_first_word = line_info.line.strip().split(' ')[0]
state.current_arg.line2_first_word_length = len(line2_first_word)
state.current_arg.line2_length = len(line_info.line)
if line_info.next.line: #check for line3
line3_split = line_info.next.line.split(':', 1)
if len(line3_split) > 1:
line3_not_arg = not _is_arg_name(line3_split[0])
line3_not_type_arg = not _as_arg_name_and_type(line3_split[0])
else:
line3_not_arg = line3_not_type_arg = None
if line3_not_arg and line3_not_type_arg: #not an arg
line3_first_word = line_info.next.line.strip().split(' ')[0]
state.current_arg.line3_first_word_length = len(line3_first_word)
else:
state.current_arg.line3_first_word_length = None
else:
state.current_arg.line2_first_word_length = None
state.current_arg.line2_length = None


def _merge_if_long_arg(state):
"""Merges first two lines of the description if the arg name is too long.

Args:
state: The state of the docstring parser.
"""
actual_max_line_len = roundup(state.max_line_length)
arg = state.current_arg
arg_length = len(arg.name)
percent_105 = 1.05 * actual_max_line_len
long_arg_name = roundup(arg_length) >= 0.4 * actual_max_line_len
if long_arg_name:
if arg.line2_first_word_length:
line1_plus_first_word = arg.line1_length + arg.line2_first_word_length
line1_plus_first_word = roundup(line1_plus_first_word)
line1_intentionally_short = line1_plus_first_word < actual_max_line_len
line1_intentionally_long = arg.line1_length >= percent_105
line2_intentionally_long = arg.line2_length >= percent_105
if arg.line3_first_word_length:
line2_plus_first_word = arg.line2_length + arg.line3_first_word_length
line2_plus_first_word = roundup(line2_plus_first_word)
line2_intentionally_short = line2_plus_first_word < actual_max_line_len
if not line1_intentionally_short and not line1_intentionally_long:
if not line2_intentionally_short and not line2_intentionally_long:
_merge_line1_line2(arg.description.lines)
elif not line1_intentionally_short and not line1_intentionally_long:
if not line2_intentionally_long:
_merge_line1_line2(arg.description.lines)


def _merge_line1_line2(lines):
"""Merges the first two lines of a list of strings.

Example:
_merge_line1_line2(["oh","no","bro"]) == ["oh no","bro"]

Args:
lines: a list of strings representing each line.
Returns:
same list but with the first two lines of the list now merged as a line.
"""
merged_line = lines[0] + " " + lines[1]
lines[0] = merged_line
lines.pop(1)
return lines


def roundup(number, multiple=10):
"""Rounds a number to the nearst multiple.

Example:
roundup(72) == 80

Args:
number: an interger type variable.
multiple: nearst multiple to round up to
Returns:
An interger value.
"""
remainder = number % multiple
if remainder == 0:
return number #already rounded
else:
return number + (multiple - remainder)


def _consume_line(line_info, state):
Expand Down Expand Up @@ -465,6 +572,9 @@ def _consume_line(line_info, state):
if state.section.title == Sections.ARGS:
if state.section.format == Formats.GOOGLE:
_consume_google_args_line(line_info, state)
if state.current_arg:
if line_info.previous.index == state.current_arg.line1_index:
_merge_if_long_arg(state)
elif state.section.format == Formats.RST:
state.current_arg.description.lines.append(line_info.remaining.strip())
elif state.section.format == Formats.NUMPY:
Expand Down Expand Up @@ -511,9 +621,10 @@ def _consume_line(line_info, state):
pass


def _create_line_info(line, next_line, previous_line):
def _create_line_info(line, next_line, previous_line, index):
"""Returns information about the current line and surrounding lines."""
line_info = Namespace() # TODO(dbieber): Switch to an explicit class.
line_info.index = index
line_info.line = line
line_info.stripped = line.strip()
line_info.remaining_raw = line_info.line
Expand All @@ -523,10 +634,12 @@ def _create_line_info(line, next_line, previous_line):
line_info.next.line = next_line
next_line_exists = next_line is not None
line_info.next.stripped = next_line.strip() if next_line_exists else None
line_info.next.index = index + 1 if next_line_exists else None
line_info.next.indentation = (
len(next_line) - len(next_line.lstrip()) if next_line_exists else None)
line_info.previous.line = previous_line
previous_line_exists = previous_line is not None
line_info.previous.index = index - 1 if previous_line_exists else None
line_info.previous.indentation = (
len(previous_line) -
len(previous_line.lstrip()) if previous_line_exists else None)
Expand Down
Loading