-
Notifications
You must be signed in to change notification settings - Fork 4
/
parse_android.Rd
54 lines (45 loc) · 2.32 KB
/
parse_android.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/parse_android.R
\name{parse_android}
\alias{parse_android}
\title{Parsing raw 'WhatsApp' chat logs according to Android text structure}
\usage{
parse_android(
chatlog,
newline_indicator = "\\n",
media_omitted = "<media omitted>",
media_indicator = "(file attached)",
sent_location = paste0("location: (?=https:\\\\/\\\\/maps\\\\.google\\\\.com\\\\/",
"\\\\?q=\\\\d\\\\d.\\\\d{6}\\\\,\\\\d\\\\.\\\\d{6})"),
live_location = "^live location shared$",
datetime_indicator = paste("(?!^)(?=((\\\\d{2}\\\\.\\\\d{2}\\\\.\\\\d{2})|(\\\\d{1,2}",
"\\\\/\\\\d{1,2}\\\\/\\\\d{2})),\\\\s\\\\d{2}\\\\:\\\\d{2}((\\\\s\\\\-)|(\\\\s(?i:(am|pm))\\\\s\\\\-)))",
sep = ""),
newline_replace = " start_newline ",
media_replace = " media_omitted ",
foursquare_loc = "^.*: https://foursquare.com/v/.*$"
)
}
\arguments{
\item{chatlog}{'WhatsApp' chat preprocessed by \code{\link[WhatsR]{parse_chat}}}
\item{newline_indicator}{character string defining character for newline indicators. Default is a Unicode newline.}
\item{media_omitted}{character string inserted by 'WhatsApp' instead of file names when not exporting media.}
\item{media_indicator}{character string for detecting media and file attachments.}
\item{sent_location}{Regex for detecting auto generated messages for locations shared via chat.}
\item{live_location}{Regex for detecting auto generated messages for live locations shared via chat.}
\item{datetime_indicator}{Regex for detecting the DateTime indicator at the beginning of each message.}
\item{newline_replace}{replacement string for a newline character in parsed message. Default is " start_newline ".}
\item{media_replace}{replacement string for omitted media files. Default is " media_omitted ".}
\item{foursquare_loc}{Regex for detecting sent Locations as FourSquare Links.}
}
\value{
A data frame containing the timestamp, name of the sender and message body
}
\description{
Creates a data frame from an exported 'WhatsApp' chat log containing one row per message
and a column for DateTime when the message was sent, name of the sender and body of the message. Only works as an intermediary function
called from within \code{\link[WhatsR]{parse_chat}}
}
\examples{
ParsedChat <- parse_android("29.01.18, 23:33 - Alice: Hi?\n 29.01.18, 23:45 - Bob: Hi\n")
}