invoice-x · eddb7 · May 2, 2019
diff --git a/src/invoice2data/extract/plugins/lines.py b/src/invoice2data/extract/plugins/lines.py
@@ -12,76 +12,76 @@
 
 def extract(self, content, output):
     """Try to extract lines from the invoice"""
+    lines = []
+    for line in self['lines']:
+        # First apply default options.
+        plugin_settings = DEFAULT_OPTIONS.copy()
+        plugin_settings.update(line)
+        line = plugin_settings
 
-    # First apply default options.
-    plugin_settings = DEFAULT_OPTIONS.copy()
-    plugin_settings.update(self['lines'])
-    self['lines'] = plugin_settings
-
-    # Validate settings
-    assert 'start' in self['lines'], 'Lines start regex missing'
-    assert 'end' in self['lines'], 'Lines end regex missing'
-    assert 'line' in self['lines'], 'Line regex missing'
+        # Validate settings
+        assert 'start' in line, 'Lines start regex missing'
+        assert 'end' in line, 'Lines end regex missing'
+        assert 'line' in line, 'Line regex missing'
 
-    start = re.search(self['lines']['start'], content)
-    end = re.search(self['lines']['end'], content)
-    if not start or not end:
-        logger.warning('no lines found - start %s, end %s', start, end)
-        return
-    content = content[start.end(): end.start()]
-    lines = []
-    current_row = {}
-    if 'first_line' not in self['lines'] and 'last_line' not in self['lines']:
-        self['lines']['first_line'] = self['lines']['line']
-    for line in re.split(self['lines']['line_separator'], content):
-        # if the line has empty lines in it , skip them
-        if not line.strip('').strip('\n') or not line:
-            continue
-        if 'first_line' in self['lines']:
-            match = re.search(self['lines']['first_line'], line)
-            if match:
-                if 'last_line' not in self['lines']:
+        start = re.search(line['start'], content)
+        end = re.search(line['end'], content)
+        if not start or not end:
+            logger.warning('no lines found - start %s, end %s', start, end)
+            return
+        content_section = content[start.end(): end.start()]
+        current_row = {}
+        if 'first_line' not in line and 'last_line' not in line:
+            line['first_line'] = line['line']
+        for line_content in re.split(line['line_separator'], content_section):
+            # if the line has empty lines in it , skip them
+            if not line_content.strip('').strip('\n') or not line_content:
+                continue
+            if 'first_line' in line:
+                match = re.search(line['first_line'], line_content)
+                if match:
+                    if 'last_line' not in line:
+                        if current_row:
+                            lines.append(current_row)
+                        current_row = {}
+                    if current_row:
+                        lines.append(current_row)
+                    current_row = {
+                        field: value.strip() if value else ''
+                        for field, value in match.groupdict().items()
+                    }
+                    continue
+            if 'last_line' in line:
+                match = re.search(line['last_line'], line_content)
+                if match:
+                    for field, value in match.groupdict().items():
+                        current_row[field] = '%s%s%s' % (
+                            current_row.get(field, ''),
+                            current_row.get(field, '') and '\n' or '',
+                            value.strip() if value else '',
+                        )
                     if current_row:
                         lines.append(current_row)
                     current_row = {}
-                if current_row:
-                    lines.append(current_row)
-                current_row = {
-                    field: value.strip() if value else ''
-                    for field, value in match.groupdict().items()
-                }
-                continue
-        if 'last_line' in self['lines']:
-            match = re.search(self['lines']['last_line'], line)
+                    continue
+            match = re.search(line['line'], line_content)
             if match:
                 for field, value in match.groupdict().items():
                     current_row[field] = '%s%s%s' % (
                         current_row.get(field, ''),
                         current_row.get(field, '') and '\n' or '',
                         value.strip() if value else '',
                     )
-                if current_row:
-                    lines.append(current_row)
-                current_row = {}
                 continue
-        match = re.search(self['lines']['line'], line)
-        if match:
-            for field, value in match.groupdict().items():
-                current_row[field] = '%s%s%s' % (
-                    current_row.get(field, ''),
-                    current_row.get(field, '') and '\n' or '',
-                    value.strip() if value else '',
-                )
-            continue
-        logger.debug('ignoring *%s* because it doesn\'t match anything', line)
-    if current_row:
-        lines.append(current_row)
+            logger.debug('ignoring *%s* because it doesn\'t match anything', line_content)
+        if current_row:
+            lines.append(current_row)
 
-    types = self['lines'].get('types', [])
-    for row in lines:
-        for name in row.keys():
-            if name in types:
-                row[name] = self.coerce_type(row[name], types[name])
+        types = line.get('types', [])
+        for row in lines:
+            for name in row.keys():
+                if name in types:
+                    row[name] = self.coerce_type(row[name], types[name])
 
-    if lines:
-        output['lines'] = lines
+        if lines:
+            output['lines'] = lines