diff --git a/services/lead_generation/main.py b/services/lead_generation/main.py new file mode 100644 index 0000000..9378bb7 --- /dev/null +++ b/services/lead_generation/main.py @@ -0,0 +1,22 @@ +import scrapy +import pandas as pd + +class LeadSpider(scrapy.Spider): + name = "leads" + start_urls = [ + 'https://example.com/leads' + ] + + def parse(self, response): + for lead in response.css('div.lead'): + yield { + 'name': lead.css('span.name::text').get(), + 'email': lead.css('span.email::text').get(), + } + +if __name__ == "__main__": + from scrapy.crawler import CrawlerProcess + + process = CrawlerProcess() + process.crawl(LeadSpider) + process.start() diff --git a/services/lead_generation/requirements.txt b/services/lead_generation/requirements.txt new file mode 100644 index 0000000..f040444 --- /dev/null +++ b/services/lead_generation/requirements.txt @@ -0,0 +1,2 @@ +scrapy +pandas