diff --git a/Cargo.toml b/Cargo.toml index 59043c6..ce02eed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,6 @@ serde = { version = "1.0.106", features = ["derive"] } serde_json = "1.0.52" # TODO: Get from crates.io once 3.0 is published clap = { git = "https://github.com/clap-rs/clap/" } -diesel = { version = "1.4", features = ["postgres"] } flate2 = "1.0.14" dotenv = "0.15.0" itertools = "0.9.0" diff --git a/Dockerfile b/Dockerfile index 2acaa02..276a806 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,14 +2,17 @@ FROM rust:1.43 as builder WORKDIR /usr/src/papergraph COPY . . -RUN cargo install diesel_cli --no-default-features --features "postgres" RUN cargo install --path . # Serving container FROM debian:buster-slim WORKDIR /usr/src/papergraph -RUN apt-get update && apt-get install -y libpq-dev wget + +RUN apt-get update && apt-get install -y wget tar +RUN wget https://github.com/dgraph-io/dgraph/releases/download/v20.03.1/dgraph-linux-amd64.tar.gz && \ + tar -C /usr/local/bin -xzf dgraph-linux-amd64.tar.gz && \ + rm dgraph-linux-amd64.tar.gz + COPY . . COPY --from=builder /usr/local/cargo/bin/papergraph /usr/local/bin/papergraph -COPY --from=builder /usr/local/cargo/bin/diesel /usr/local/bin/diesel -CMD ["papergraph"] +CMD ["papergraph"] \ No newline at end of file diff --git a/deploy/k8s/cloudsql.yaml b/deploy/k8s/cloudsql.yaml deleted file mode 100644 index 42ab973..0000000 --- a/deploy/k8s/cloudsql.yaml +++ /dev/null @@ -1,53 +0,0 @@ ---- -apiVersion: v1 -kind: Service -metadata: - name: papergraph-cloudsql - labels: - app: papergraph -spec: - ports: - - port: 5432 - selector: - app: papergraph - tier: cloudsql - clusterIP: None ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: papergraph-cloudsql - labels: - app: papergraph -spec: - replicas: 1 - selector: - matchLabels: - app: papergraph - tier: cloudsql - template: - metadata: - labels: - app: papergraph - tier: cloudsql - spec: - volumes: - - name: papergraph-credentials - secret: - secretName: papergraph-sa-cred - containers: - - name: cloudsql-proxy - image: gcr.io/cloudsql-docker/gce-proxy:1.16 - command: ["/cloud_sql_proxy", - "-instances=sideprojects-234609:us-central1:papergraph=tcp:0.0.0.0:5432", - "-credential_file=/secrets/papergraph-sa.json"] - ports: - - containerPort: 5432 - name: postgres - securityContext: - runAsUser: 2 # non-root user - allowPrivilegeEscalation: false - volumeMounts: - - name: papergraph-credentials - mountPath: /secrets - readOnly: true diff --git a/deploy/k8s/dgraph.yaml b/deploy/k8s/dgraph.yaml new file mode 100644 index 0000000..3342b01 --- /dev/null +++ b/deploy/k8s/dgraph.yaml @@ -0,0 +1,145 @@ +--- +# A small volume to store the dgraph xid mapping for data insertions +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: dgraph-xid + labels: + app: papergraph + tier: dgraph +spec: + accessModes: + - ReadWriteOnce + storageClassName: faster + resources: + requests: + storage: 16Gi +--- +# This is the service that should be used by the clients of Dgraph to talk to the cluster. +apiVersion: v1 +kind: Service +metadata: + name: papergraph-dgraph + labels: + app: papergraph + tier: dgraph +spec: + # type: LoadBalancer + ports: + - port: 5080 + targetPort: 5080 + name: zero-grpc + - port: 6080 + targetPort: 6080 + name: zero-http + - port: 8080 + targetPort: 8080 + name: alpha-http + - port: 9080 + targetPort: 9080 + name: alpha-grpc + - port: 8000 + targetPort: 8000 + name: ratel-http + selector: + app: papergraph + tier: dgraph +--- +# This StatefulSet runs 1 pod with one Zero, one Alpha & one Ratel containers. +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: papergraph-dgraph +spec: + serviceName: "papergraph-dgraph" + replicas: 1 + selector: + matchLabels: + app: papergraph + tier: dgraph + template: + metadata: + labels: + app: papergraph + tier: dgraph + spec: + tolerations: + - key: "preemptible" + operator: "Exists" + effect: "NoSchedule" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: cloud.google.com/gke-nodepool + operator: In + values: + - preempt-1 + containers: + - name: ratel + image: dgraph/dgraph:v20.03.1 + imagePullPolicy: Always + ports: + - containerPort: 8000 + name: ratel-http + command: + - dgraph-ratel + - name: zero + image: dgraph/dgraph:v20.03.1 + imagePullPolicy: Always + ports: + - containerPort: 5080 + name: zero-grpc + - containerPort: 6080 + name: zero-http + volumeMounts: + - name: datadir + mountPath: /dgraph + command: + - bash + - "-c" + - | + set -ex + dgraph zero --my=localhost:5080 --ludicrous_mode + - name: alpha + image: dgraph/dgraph:v20.03.1 + imagePullPolicy: Always + ports: + - containerPort: 8080 + name: alpha-http + - containerPort: 9080 + name: alpha-grpc + volumeMounts: + - name: datadir + mountPath: /dgraph + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + command: + - bash + - "-c" + - | + set -ex + dgraph alpha --my=localhost:7080 --lru_mb 8192 --zero localhost:5080 --ludicrous_mode + terminationGracePeriodSeconds: 60 + volumes: + - name: datadir + persistentVolumeClaim: + claimName: datadir + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - metadata: + name: datadir + annotations: + volume.alpha.kubernetes.io/storage-class: anything + spec: + storageClassName: faster + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 128Gi diff --git a/deploy/k8s/hasura-dev.yaml b/deploy/k8s/hasura-dev.yaml deleted file mode 100644 index 4cafd12..0000000 --- a/deploy/k8s/hasura-dev.yaml +++ /dev/null @@ -1,36 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: papergraph-hasura-dev - labels: - app: papergraph -spec: - replicas: 1 - selector: - matchLabels: - app: papergraph - tier: hasura-dev - template: - metadata: - labels: - app: papergraph - tier: hasura-dev - spec: - containers: - - name: hasura - image: hasura/graphql-engine:v1.2.0.cli-migrations-v2 - ports: - - containerPort: 8080 - name: http - env: - - name: PGPASSWORD - valueFrom: - secretKeyRef: - name: papergraph-cloudsql - key: password - - name: HASURA_GRAPHQL_DATABASE_URL - value: postgres://postgres:$(PGPASSWORD)@papergraph-cloudsql:5432/papergraph - - name: HASURA_GRAPHQL_ENABLED_LOG_TYPES - value: "startup, http-log, webhook-log, websocket-log, query-log" - - name: HASURA_GRAPHQL_ENABLE_CONSOLE - value: "true" \ No newline at end of file diff --git a/deploy/k8s/hasura.yaml b/deploy/k8s/hasura.yaml deleted file mode 100644 index 4c833b4..0000000 --- a/deploy/k8s/hasura.yaml +++ /dev/null @@ -1,70 +0,0 @@ ---- -apiVersion: v1 -kind: Service -metadata: - name: papergraph-hasura - # annotations: - # cloud.google.com/neg: '{"ingress": true}' # Creates a NEG after an Ingress is created - labels: - app: papergraph -spec: - ports: - - port: 8080 - targetPort: 8080 - selector: - app: papergraph - tier: hasura - type: NodePort ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: papergraph-hasura - labels: - app: papergraph -spec: - replicas: 1 - selector: - matchLabels: - app: papergraph - tier: hasura - template: - metadata: - labels: - app: papergraph - tier: hasura - spec: - containers: - - name: hasura - image: hasura/graphql-engine:v1.2.0.cli-migrations-v2 - ports: - - containerPort: 8080 - name: http - livenessProbe: - httpGet: - path: /healthz - port: 8080 - failureThreshold: 1 - periodSeconds: 10 - readinessProbe: - httpGet: - path: /healthz - port: 8080 - initialDelaySeconds: 5 - periodSeconds: 5 - env: - - name: PGPASSWORD - valueFrom: - secretKeyRef: - name: papergraph-cloudsql - key: password - - name: HASURA_GRAPHQL_DATABASE_URL - value: postgres://postgres:$(PGPASSWORD)@papergraph-cloudsql:5432/papergraph - - name: HASURA_GRAPHQL_ENABLED_LOG_TYPES - value: "startup, http-log, webhook-log, websocket-log, query-log" - # Disable console in production - - name: HASURA_GRAPHQL_ENABLE_CONSOLE - value: "false" - # Disable metadata and other APIs in production - - name: HASURA_GRAPHQL_ENABLED_APIS - value: graphql \ No newline at end of file diff --git a/deploy/k8s/ingress.yaml b/deploy/k8s/ingress.yaml index 119e2c3..0adb7ba 100644 --- a/deploy/k8s/ingress.yaml +++ b/deploy/k8s/ingress.yaml @@ -11,5 +11,5 @@ spec: paths: - path: /v1/graphql backend: - serviceName: papergraph-hasura + serviceName: papergraph-dgraph servicePort: 8080 \ No newline at end of file diff --git a/deploy/k8s/postgres.yaml b/deploy/k8s/postgres.yaml deleted file mode 100644 index 59476ec..0000000 --- a/deploy/k8s/postgres.yaml +++ /dev/null @@ -1,71 +0,0 @@ ---- -apiVersion: v1 -kind: Service -metadata: - name: papergraph-postgres - labels: - app: papergraph -spec: - ports: - - port: 5432 - selector: - app: papergraph - tier: postgres - clusterIP: None ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: postgres-pv-claim - labels: - app: papergraph -spec: - storageClassName: faster - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 128Gi ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: papergraph-postgres - labels: - app: papergraph -spec: - replicas: 1 - selector: - matchLabels: - app: papergraph - tier: postgres - template: - metadata: - labels: - app: papergraph - tier: postgres - spec: - volumes: - - name: postgres-data - persistentVolumeClaim: - claimName: postgres-pv-claim - containers: - - name: postgres - image: postgres:12 - ports: - - containerPort: 5432 - name: postgres - volumeMounts: - - name: postgres-data - mountPath: /var/lib/postgresql/data - subPath: data - env: - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: papergraph-cred - key: postgres-password - - name: POSTGRES_DB - value: papergraph - - name: POSTGRES_USER - value: papergraph \ No newline at end of file diff --git a/deploy/k8s/workflows/run-dgraph.yaml b/deploy/k8s/workflows/run-dgraph.yaml new file mode 100644 index 0000000..68b1ecd --- /dev/null +++ b/deploy/k8s/workflows/run-dgraph.yaml @@ -0,0 +1,37 @@ + +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: papergraph-dgraph- +spec: + entrypoint: run + arguments: + parameters: + - name: cmd + value: "echo hello world" + templates: + - name: run + tolerations: + - key: "preemptible" + operator: "Exists" + effect: "NoSchedule" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: cloud.google.com/gke-nodepool + operator: In + values: + - preempt-1 + inputs: + parameters: + - name: cmd + script: + image: dgraph/dgraph:v20.03.1 + env: + - name: DGRAPH_URL + value: papergraph-dgraph:8080 + command: ["/bin/bash"] + source: | + {{inputs.parameters.cmd}} diff --git a/deploy/k8s/workflows/run.yaml b/deploy/k8s/workflows/run.yaml deleted file mode 100644 index 6a6355c..0000000 --- a/deploy/k8s/workflows/run.yaml +++ /dev/null @@ -1,31 +0,0 @@ - -apiVersion: argoproj.io/v1alpha1 -kind: Workflow -metadata: - generateName: papergraph-run- -spec: - entrypoint: run - arguments: - parameters: - - name: cmd - value: diesel print-schema - templates: - - name: run - inputs: - parameters: - - name: cmd - script: - image: dennybritz/papergraph:latest - env: - - name: RUST_LOG - value: info - - name: PGPASSWORD - valueFrom: - secretKeyRef: - name: papergraph-cloudsql - key: password - - name: DATABASE_URL - value: postgres://postgres:$(PGPASSWORD)@papergraph-cloudsql:5432/papergraph - command: ["/bin/bash"] - source: | - {{inputs.parameters.cmd}} diff --git a/deploy/k8s/workflows/seed-dgraph.yaml b/deploy/k8s/workflows/seed-dgraph.yaml new file mode 100644 index 0000000..75517fa --- /dev/null +++ b/deploy/k8s/workflows/seed-dgraph.yaml @@ -0,0 +1,64 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: papergraph-seed- +spec: + entrypoint: seed-all + volumes: + - name: dgraph-xid + persistentVolumeClaim: + claimName: dgraph-xid + templates: + - name: seed-all + parallelism: 1 + steps: + - - name: insert-all + template: insert + arguments: + parameters: + - name: url + value: "{{item}}" + withItems: + # - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-000.gz + - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-001.gz + - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-002.gz + - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-003.gz + - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-004.gz + - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-005.gz + - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-006.gz + - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-007.gz + - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-008.gz + - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-009.gz + - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-010.gz + - name: insert + inputs: + parameters: + - name: url + script: + image: dennybritz/papergraph:sha-f488b60 + volumeMounts: + - name: dgraph-xid + mountPath: /dgraph-xid + env: + - name: RUST_LOG + value: debug + - name: DATA_URL + value: "{{inputs.parameters.url}}" + - name: DGRAPH_HOST + value: papergraph-dgraph + command: ["/bin/bash"] + source: | + echo ${DATA_URL} + wget ${DATA_URL} + + # Convert to triples + FILENAME=$(basename ${DATA_URL}) + papergraph make-triples -d ${FILENAME} > ${FILENAME}.rdf + + # Insert into dgraph + dgraph live \ + -a ${DGRAPH_HOST}:9080 \ + -z ${DGRAPH_HOST}:5080 \ + -s papergraph.schema \ + -f ${FILENAME}.rdf \ + -x /dgraph-xid diff --git a/deploy/k8s/workflows/seed.yaml b/deploy/k8s/workflows/seed.yaml deleted file mode 100644 index c375f3e..0000000 --- a/deploy/k8s/workflows/seed.yaml +++ /dev/null @@ -1,273 +0,0 @@ -apiVersion: argoproj.io/v1alpha1 -kind: Workflow -metadata: - generateName: papergraph-seed- -spec: - entrypoint: seed-all - volumes: - - name: papergraph-credentials - secret: - secretName: papergraph-sa-cred - templates: - - name: seed-all - parallelism: 1 - steps: - - - name: drop-index - template: psql - arguments: - parameters: - - name: cmd - value: | - DROP INDEX IF EXISTS title_idx; - DROP INDEX IF EXISTS title_trgm_idx; - ALTER TABLE papers SET UNLOGGED; - ALTER TABLE authors SET UNLOGGED; - ALTER TABLE paper_authors SET UNLOGGED; - - - name: insert-all - template: insert - arguments: - parameters: - - name: url - value: "{{item}}" - withItems: - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-000.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-001.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-002.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-003.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-004.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-005.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-006.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-007.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-008.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-009.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-010.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-011.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-012.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-013.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-014.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-015.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-016.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-017.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-018.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-019.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-020.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-021.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-022.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-023.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-024.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-025.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-026.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-027.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-028.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-029.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-030.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-031.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-032.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-033.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-034.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-035.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-036.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-037.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-038.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-039.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-040.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-041.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-042.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-043.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-044.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-045.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-046.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-047.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-048.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-049.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-050.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-051.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-052.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-053.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-054.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-055.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-056.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-057.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-058.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-059.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-060.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-061.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-062.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-063.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-064.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-065.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-066.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-067.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-068.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-069.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-070.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-071.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-072.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-073.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-074.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-075.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-076.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-077.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-078.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-079.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-080.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-081.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-082.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-083.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-084.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-085.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-086.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-087.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-088.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-089.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-090.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-091.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-092.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-093.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-094.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-095.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-096.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-097.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-098.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-099.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-100.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-101.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-102.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-103.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-104.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-105.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-106.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-107.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-108.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-109.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-110.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-111.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-112.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-113.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-114.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-115.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-116.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-117.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-118.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-119.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-120.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-121.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-122.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-123.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-124.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-125.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-126.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-127.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-128.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-129.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-130.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-131.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-132.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-133.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-134.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-135.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-136.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-137.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-138.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-139.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-140.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-141.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-142.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-143.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-144.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-145.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-146.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-147.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-148.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-149.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-150.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-151.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-152.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-153.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-154.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-155.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-156.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-157.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-158.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-159.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-160.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-161.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-162.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-163.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-164.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-165.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-166.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-167.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-168.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-169.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-170.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-171.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-172.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-173.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-174.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-175.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-176.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-177.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-178.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-179.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-180.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-181.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-182.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-183.gz - - https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2020-04-10/s2-corpus-184.gz - - - - name: create-index - template: psql - arguments: - parameters: - - name: cmd - value: | - ALTER TABLE papers SET LOGGED; - ALTER TABLE authors SET LOGGED; - ALTER TABLE paper_authors SET LOGGED; - CREATE INDEX title_idx ON papers(title); - CREATE INDEX title_trgm_idx ON papers USING GIST (title gist_trgm_ops); - - name: psql - inputs: - parameters: - - name: cmd - container: - image: postgres:12 - env: - - name: PGPASSWORD - valueFrom: - secretKeyRef: - name: papergraph-cloudsql - key: password - - name: PGHOST - value: papergraph-cloudsql - - name: PGDATABASE - value: papergraph - - name: PGUSER - value: postgres - command: ["psql", "-c"] - args: - - "{{inputs.parameters.cmd}}" - - name: insert - inputs: - parameters: - - name: url - script: - image: dennybritz/papergraph:latest - env: - - name: RUST_LOG - value: debug - - name: PGPASSWORD - valueFrom: - secretKeyRef: - name: papergraph-cloudsql - key: password - - name: DATABASE_URL - value: postgres://postgres:$(PGPASSWORD)@papergraph-cloudsql:5432/papergraph - - name: DATA_URL - value: "{{inputs.parameters.url}}" - command: ["/bin/bash"] - source: | - echo ${DATA_URL} - wget ${DATA_URL} - papergraph insert -d $(basename ${DATA_URL}) diff --git a/diesel.toml b/diesel.toml deleted file mode 100644 index 71215db..0000000 --- a/diesel.toml +++ /dev/null @@ -1,5 +0,0 @@ -# For documentation on how to configure this file, -# see diesel.rs/guides/configuring-diesel-cli - -[print_schema] -file = "src/db/schema.rs" diff --git a/hasura/config.yaml b/hasura/config.yaml deleted file mode 100644 index 6ddf586..0000000 --- a/hasura/config.yaml +++ /dev/null @@ -1,6 +0,0 @@ -version: 2 -endpoint: http://localhost:8080 -metadata_directory: metadata -actions: - kind: synchronous - handler_webhook_baseurl: http://localhost:3000 diff --git a/hasura/metadata/actions.graphql b/hasura/metadata/actions.graphql deleted file mode 100644 index 139597f..0000000 --- a/hasura/metadata/actions.graphql +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/hasura/metadata/actions.yaml b/hasura/metadata/actions.yaml deleted file mode 100644 index 1edb4c2..0000000 --- a/hasura/metadata/actions.yaml +++ /dev/null @@ -1,6 +0,0 @@ -actions: [] -custom_types: - enums: [] - input_objects: [] - objects: [] - scalars: [] diff --git a/hasura/metadata/allow_list.yaml b/hasura/metadata/allow_list.yaml deleted file mode 100644 index fe51488..0000000 --- a/hasura/metadata/allow_list.yaml +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/hasura/metadata/functions.yaml b/hasura/metadata/functions.yaml deleted file mode 100644 index fe51488..0000000 --- a/hasura/metadata/functions.yaml +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/hasura/metadata/query_collections.yaml b/hasura/metadata/query_collections.yaml deleted file mode 100644 index fe51488..0000000 --- a/hasura/metadata/query_collections.yaml +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/hasura/metadata/remote_schemas.yaml b/hasura/metadata/remote_schemas.yaml deleted file mode 100644 index fe51488..0000000 --- a/hasura/metadata/remote_schemas.yaml +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/hasura/metadata/tables.yaml b/hasura/metadata/tables.yaml deleted file mode 100644 index aa3fa03..0000000 --- a/hasura/metadata/tables.yaml +++ /dev/null @@ -1,50 +0,0 @@ -- table: - schema: public - name: authors -- table: - schema: public - name: paper_authors - object_relationships: - - name: author - using: - manual_configuration: - remote_table: - schema: public - name: authors - column_mapping: - author_id: id - - name: paper - using: - manual_configuration: - remote_table: - schema: public - name: papers - column_mapping: - paper_id: id -- table: - schema: public - name: papers - array_relationships: - - name: authors - using: - manual_configuration: - remote_table: - schema: public - name: paper_authors - column_mapping: - id: paper_id - computed_fields: - - name: cites - definition: - function: - schema: public - name: cites - table_argument: null - comment: "" - - name: num_citations - definition: - function: - schema: public - name: num_citations - table_argument: null - comment: "" diff --git a/hasura/metadata/version.yaml b/hasura/metadata/version.yaml deleted file mode 100644 index 22817d2..0000000 --- a/hasura/metadata/version.yaml +++ /dev/null @@ -1 +0,0 @@ -version: 2 diff --git a/hasura/schema.graphql b/hasura/schema.graphql deleted file mode 100644 index 6a7e1a0..0000000 --- a/hasura/schema.graphql +++ /dev/null @@ -1,1165 +0,0 @@ -schema { - query: query_root - mutation: mutation_root - subscription: subscription_root -} - -scalar _text - -# expression to compare columns of type _text. All fields are combined with logical 'AND'. -input _text_comparison_exp { - _eq: _text - _gt: _text - _gte: _text - _in: [_text!] - _is_null: Boolean - _lt: _text - _lte: _text - _neq: _text - _nin: [_text!] -} - -# columns and relationships of "authors" -type authors { - id: String! - name: String! -} - -# aggregated selection of "authors" -type authors_aggregate { - aggregate: authors_aggregate_fields - nodes: [authors!]! -} - -# aggregate fields of "authors" -type authors_aggregate_fields { - count(columns: [authors_select_column!], distinct: Boolean): Int - max: authors_max_fields - min: authors_min_fields -} - -# order by aggregate values of table "authors" -input authors_aggregate_order_by { - count: order_by - max: authors_max_order_by - min: authors_min_order_by -} - -# input type for inserting array relation for remote table "authors" -input authors_arr_rel_insert_input { - data: [authors_insert_input!]! - on_conflict: authors_on_conflict -} - -# Boolean expression to filter rows from the table "authors". All fields are combined with a logical 'AND'. -input authors_bool_exp { - _and: [authors_bool_exp] - _not: authors_bool_exp - _or: [authors_bool_exp] - id: String_comparison_exp - name: String_comparison_exp -} - -# unique or primary key constraints on table "authors" -enum authors_constraint { - # unique or primary key constraint - authors_pkey -} - -# input type for inserting data into table "authors" -input authors_insert_input { - id: String - name: String -} - -# aggregate max on columns -type authors_max_fields { - id: String - name: String -} - -# order by max() on columns of table "authors" -input authors_max_order_by { - id: order_by - name: order_by -} - -# aggregate min on columns -type authors_min_fields { - id: String - name: String -} - -# order by min() on columns of table "authors" -input authors_min_order_by { - id: order_by - name: order_by -} - -# response of any mutation on the table "authors" -type authors_mutation_response { - # number of affected rows by the mutation - affected_rows: Int! - - # data of the affected rows by the mutation - returning: [authors!]! -} - -# input type for inserting object relation for remote table "authors" -input authors_obj_rel_insert_input { - data: authors_insert_input! - on_conflict: authors_on_conflict -} - -# on conflict condition type for table "authors" -input authors_on_conflict { - constraint: authors_constraint! - update_columns: [authors_update_column!]! - where: authors_bool_exp -} - -# ordering options when selecting data from "authors" -input authors_order_by { - id: order_by - name: order_by -} - -# primary key columns input for table: "authors" -input authors_pk_columns_input { - id: String! -} - -# select columns of table "authors" -enum authors_select_column { - # column name - id - - # column name - name -} - -# input type for updating data in table "authors" -input authors_set_input { - id: String - name: String -} - -# update columns of table "authors" -enum authors_update_column { - # column name - id - - # column name - name -} - -input cites_args { - limit_: Int -} - -# mutation root -type mutation_root { - # delete data from the table: "authors" - delete_authors( - # filter the rows which have to be deleted - where: authors_bool_exp! - ): authors_mutation_response - - # delete single row from the table: "authors" - delete_authors_by_pk(id: String!): authors - - # delete data from the table: "paper_authors" - delete_paper_authors( - # filter the rows which have to be deleted - where: paper_authors_bool_exp! - ): paper_authors_mutation_response - - # delete single row from the table: "paper_authors" - delete_paper_authors_by_pk(author_id: String!, paper_id: String!): paper_authors - - # delete data from the table: "papers" - delete_papers( - # filter the rows which have to be deleted - where: papers_bool_exp! - ): papers_mutation_response - - # delete single row from the table: "papers" - delete_papers_by_pk(id: String!): papers - - # insert data into the table: "authors" - insert_authors( - # the rows to be inserted - objects: [authors_insert_input!]! - - # on conflict condition - on_conflict: authors_on_conflict - ): authors_mutation_response - - # insert a single row into the table: "authors" - insert_authors_one( - # the row to be inserted - object: authors_insert_input! - - # on conflict condition - on_conflict: authors_on_conflict - ): authors - - # insert data into the table: "paper_authors" - insert_paper_authors( - # the rows to be inserted - objects: [paper_authors_insert_input!]! - - # on conflict condition - on_conflict: paper_authors_on_conflict - ): paper_authors_mutation_response - - # insert a single row into the table: "paper_authors" - insert_paper_authors_one( - # the row to be inserted - object: paper_authors_insert_input! - - # on conflict condition - on_conflict: paper_authors_on_conflict - ): paper_authors - - # insert data into the table: "papers" - insert_papers( - # the rows to be inserted - objects: [papers_insert_input!]! - - # on conflict condition - on_conflict: papers_on_conflict - ): papers_mutation_response - - # insert a single row into the table: "papers" - insert_papers_one( - # the row to be inserted - object: papers_insert_input! - - # on conflict condition - on_conflict: papers_on_conflict - ): papers - - # update data of the table: "authors" - update_authors( - # sets the columns of the filtered rows to the given values - _set: authors_set_input - - # filter the rows which have to be updated - where: authors_bool_exp! - ): authors_mutation_response - - # update single row of the table: "authors" - update_authors_by_pk( - # sets the columns of the filtered rows to the given values - _set: authors_set_input - pk_columns: authors_pk_columns_input! - ): authors - - # update data of the table: "paper_authors" - update_paper_authors( - # sets the columns of the filtered rows to the given values - _set: paper_authors_set_input - - # filter the rows which have to be updated - where: paper_authors_bool_exp! - ): paper_authors_mutation_response - - # update single row of the table: "paper_authors" - update_paper_authors_by_pk( - # sets the columns of the filtered rows to the given values - _set: paper_authors_set_input - pk_columns: paper_authors_pk_columns_input! - ): paper_authors - - # update data of the table: "papers" - update_papers( - # increments the integer columns with given value of the filtered values - _inc: papers_inc_input - - # sets the columns of the filtered rows to the given values - _set: papers_set_input - - # filter the rows which have to be updated - where: papers_bool_exp! - ): papers_mutation_response - - # update single row of the table: "papers" - update_papers_by_pk( - # increments the integer columns with given value of the filtered values - _inc: papers_inc_input - - # sets the columns of the filtered rows to the given values - _set: papers_set_input - pk_columns: papers_pk_columns_input! - ): papers -} - -# column ordering options -enum order_by { - # in the ascending order, nulls last - asc - - # in the ascending order, nulls first - asc_nulls_first - - # in the ascending order, nulls last - asc_nulls_last - - # in the descending order, nulls first - desc - - # in the descending order, nulls first - desc_nulls_first - - # in the descending order, nulls last - desc_nulls_last -} - -# columns and relationships of "paper_authors" -type paper_authors { - # An object relationship - author: authors - author_id: String! - - # An object relationship - paper: papers - paper_id: String! -} - -# aggregated selection of "paper_authors" -type paper_authors_aggregate { - aggregate: paper_authors_aggregate_fields - nodes: [paper_authors!]! -} - -# aggregate fields of "paper_authors" -type paper_authors_aggregate_fields { - count(columns: [paper_authors_select_column!], distinct: Boolean): Int - max: paper_authors_max_fields - min: paper_authors_min_fields -} - -# order by aggregate values of table "paper_authors" -input paper_authors_aggregate_order_by { - count: order_by - max: paper_authors_max_order_by - min: paper_authors_min_order_by -} - -# input type for inserting array relation for remote table "paper_authors" -input paper_authors_arr_rel_insert_input { - data: [paper_authors_insert_input!]! - on_conflict: paper_authors_on_conflict -} - -# Boolean expression to filter rows from the table "paper_authors". All fields are combined with a logical 'AND'. -input paper_authors_bool_exp { - _and: [paper_authors_bool_exp] - _not: paper_authors_bool_exp - _or: [paper_authors_bool_exp] - author: authors_bool_exp - author_id: String_comparison_exp - paper: papers_bool_exp - paper_id: String_comparison_exp -} - -# unique or primary key constraints on table "paper_authors" -enum paper_authors_constraint { - # unique or primary key constraint - paper_authors_pkey -} - -# input type for inserting data into table "paper_authors" -input paper_authors_insert_input { - author: authors_obj_rel_insert_input - author_id: String - paper: papers_obj_rel_insert_input - paper_id: String -} - -# aggregate max on columns -type paper_authors_max_fields { - author_id: String - paper_id: String -} - -# order by max() on columns of table "paper_authors" -input paper_authors_max_order_by { - author_id: order_by - paper_id: order_by -} - -# aggregate min on columns -type paper_authors_min_fields { - author_id: String - paper_id: String -} - -# order by min() on columns of table "paper_authors" -input paper_authors_min_order_by { - author_id: order_by - paper_id: order_by -} - -# response of any mutation on the table "paper_authors" -type paper_authors_mutation_response { - # number of affected rows by the mutation - affected_rows: Int! - - # data of the affected rows by the mutation - returning: [paper_authors!]! -} - -# input type for inserting object relation for remote table "paper_authors" -input paper_authors_obj_rel_insert_input { - data: paper_authors_insert_input! - on_conflict: paper_authors_on_conflict -} - -# on conflict condition type for table "paper_authors" -input paper_authors_on_conflict { - constraint: paper_authors_constraint! - update_columns: [paper_authors_update_column!]! - where: paper_authors_bool_exp -} - -# ordering options when selecting data from "paper_authors" -input paper_authors_order_by { - author: authors_order_by - author_id: order_by - paper: papers_order_by - paper_id: order_by -} - -# primary key columns input for table: "paper_authors" -input paper_authors_pk_columns_input { - author_id: String! - paper_id: String! -} - -# select columns of table "paper_authors" -enum paper_authors_select_column { - # column name - author_id - - # column name - paper_id -} - -# input type for updating data in table "paper_authors" -input paper_authors_set_input { - author_id: String - paper_id: String -} - -# update columns of table "paper_authors" -enum paper_authors_update_column { - # column name - author_id - - # column name - paper_id -} - -# columns and relationships of "papers" -type papers { - # An array relationship - authors( - # distinct select on columns - distinct_on: [paper_authors_select_column!] - - # limit the number of rows returned - limit: Int - - # skip the first n rows. Use only with order_by - offset: Int - - # sort the rows by one or more columns - order_by: [paper_authors_order_by!] - - # filter the rows returned - where: paper_authors_bool_exp - ): [paper_authors!]! - - # An aggregated array relationship - authors_aggregate( - # distinct select on columns - distinct_on: [paper_authors_select_column!] - - # limit the number of rows returned - limit: Int - - # skip the first n rows. Use only with order_by - offset: Int - - # sort the rows by one or more columns - order_by: [paper_authors_order_by!] - - # filter the rows returned - where: paper_authors_bool_exp - ): paper_authors_aggregate! - - # A computed field, executes function "cites" - cites( - # input parameters for function "cites" - args: cites_args! - - # distinct select on columns - distinct_on: [papers_select_column!] - - # limit the number of rows returned - limit: Int - - # skip the first n rows. Use only with order_by - offset: Int - - # sort the rows by one or more columns - order_by: [papers_order_by!] - - # filter the rows returned - where: papers_bool_exp - ): [papers!] - doi: String - doi_url: String - entities: _text - fields_of_study: _text - id: String! - in_citations: _text - - # A computed field, executes function "num_citations" - num_citations: Int - out_citations: _text - paper_abstract: String - pdf_urls: _text - s2_url: String - title: String! - year: smallint -} - -# aggregated selection of "papers" -type papers_aggregate { - aggregate: papers_aggregate_fields - nodes: [papers!]! -} - -# aggregate fields of "papers" -type papers_aggregate_fields { - avg: papers_avg_fields - count(columns: [papers_select_column!], distinct: Boolean): Int - max: papers_max_fields - min: papers_min_fields - stddev: papers_stddev_fields - stddev_pop: papers_stddev_pop_fields - stddev_samp: papers_stddev_samp_fields - sum: papers_sum_fields - var_pop: papers_var_pop_fields - var_samp: papers_var_samp_fields - variance: papers_variance_fields -} - -# order by aggregate values of table "papers" -input papers_aggregate_order_by { - avg: papers_avg_order_by - count: order_by - max: papers_max_order_by - min: papers_min_order_by - stddev: papers_stddev_order_by - stddev_pop: papers_stddev_pop_order_by - stddev_samp: papers_stddev_samp_order_by - sum: papers_sum_order_by - var_pop: papers_var_pop_order_by - var_samp: papers_var_samp_order_by - variance: papers_variance_order_by -} - -# input type for inserting array relation for remote table "papers" -input papers_arr_rel_insert_input { - data: [papers_insert_input!]! - on_conflict: papers_on_conflict -} - -# aggregate avg on columns -type papers_avg_fields { - year: Float -} - -# order by avg() on columns of table "papers" -input papers_avg_order_by { - year: order_by -} - -# Boolean expression to filter rows from the table "papers". All fields are combined with a logical 'AND'. -input papers_bool_exp { - _and: [papers_bool_exp] - _not: papers_bool_exp - _or: [papers_bool_exp] - authors: paper_authors_bool_exp - doi: String_comparison_exp - doi_url: String_comparison_exp - entities: _text_comparison_exp - fields_of_study: _text_comparison_exp - id: String_comparison_exp - in_citations: _text_comparison_exp - out_citations: _text_comparison_exp - paper_abstract: String_comparison_exp - pdf_urls: _text_comparison_exp - s2_url: String_comparison_exp - title: String_comparison_exp - year: smallint_comparison_exp -} - -# unique or primary key constraints on table "papers" -enum papers_constraint { - # unique or primary key constraint - papers_pkey -} - -# input type for incrementing integer column in table "papers" -input papers_inc_input { - year: smallint -} - -# input type for inserting data into table "papers" -input papers_insert_input { - authors: paper_authors_arr_rel_insert_input - doi: String - doi_url: String - entities: _text - fields_of_study: _text - id: String - in_citations: _text - out_citations: _text - paper_abstract: String - pdf_urls: _text - s2_url: String - title: String - year: smallint -} - -# aggregate max on columns -type papers_max_fields { - doi: String - doi_url: String - id: String - paper_abstract: String - s2_url: String - title: String - year: smallint -} - -# order by max() on columns of table "papers" -input papers_max_order_by { - doi: order_by - doi_url: order_by - id: order_by - paper_abstract: order_by - s2_url: order_by - title: order_by - year: order_by -} - -# aggregate min on columns -type papers_min_fields { - doi: String - doi_url: String - id: String - paper_abstract: String - s2_url: String - title: String - year: smallint -} - -# order by min() on columns of table "papers" -input papers_min_order_by { - doi: order_by - doi_url: order_by - id: order_by - paper_abstract: order_by - s2_url: order_by - title: order_by - year: order_by -} - -# response of any mutation on the table "papers" -type papers_mutation_response { - # number of affected rows by the mutation - affected_rows: Int! - - # data of the affected rows by the mutation - returning: [papers!]! -} - -# input type for inserting object relation for remote table "papers" -input papers_obj_rel_insert_input { - data: papers_insert_input! - on_conflict: papers_on_conflict -} - -# on conflict condition type for table "papers" -input papers_on_conflict { - constraint: papers_constraint! - update_columns: [papers_update_column!]! - where: papers_bool_exp -} - -# ordering options when selecting data from "papers" -input papers_order_by { - authors_aggregate: paper_authors_aggregate_order_by - doi: order_by - doi_url: order_by - entities: order_by - fields_of_study: order_by - id: order_by - in_citations: order_by - out_citations: order_by - paper_abstract: order_by - pdf_urls: order_by - s2_url: order_by - title: order_by - year: order_by -} - -# primary key columns input for table: "papers" -input papers_pk_columns_input { - id: String! -} - -# select columns of table "papers" -enum papers_select_column { - # column name - doi - - # column name - doi_url - - # column name - entities - - # column name - fields_of_study - - # column name - id - - # column name - in_citations - - # column name - out_citations - - # column name - paper_abstract - - # column name - pdf_urls - - # column name - s2_url - - # column name - title - - # column name - year -} - -# input type for updating data in table "papers" -input papers_set_input { - doi: String - doi_url: String - entities: _text - fields_of_study: _text - id: String - in_citations: _text - out_citations: _text - paper_abstract: String - pdf_urls: _text - s2_url: String - title: String - year: smallint -} - -# aggregate stddev on columns -type papers_stddev_fields { - year: Float -} - -# order by stddev() on columns of table "papers" -input papers_stddev_order_by { - year: order_by -} - -# aggregate stddev_pop on columns -type papers_stddev_pop_fields { - year: Float -} - -# order by stddev_pop() on columns of table "papers" -input papers_stddev_pop_order_by { - year: order_by -} - -# aggregate stddev_samp on columns -type papers_stddev_samp_fields { - year: Float -} - -# order by stddev_samp() on columns of table "papers" -input papers_stddev_samp_order_by { - year: order_by -} - -# aggregate sum on columns -type papers_sum_fields { - year: smallint -} - -# order by sum() on columns of table "papers" -input papers_sum_order_by { - year: order_by -} - -# update columns of table "papers" -enum papers_update_column { - # column name - doi - - # column name - doi_url - - # column name - entities - - # column name - fields_of_study - - # column name - id - - # column name - in_citations - - # column name - out_citations - - # column name - paper_abstract - - # column name - pdf_urls - - # column name - s2_url - - # column name - title - - # column name - year -} - -# aggregate var_pop on columns -type papers_var_pop_fields { - year: Float -} - -# order by var_pop() on columns of table "papers" -input papers_var_pop_order_by { - year: order_by -} - -# aggregate var_samp on columns -type papers_var_samp_fields { - year: Float -} - -# order by var_samp() on columns of table "papers" -input papers_var_samp_order_by { - year: order_by -} - -# aggregate variance on columns -type papers_variance_fields { - year: Float -} - -# order by variance() on columns of table "papers" -input papers_variance_order_by { - year: order_by -} - -# query root -type query_root { - # fetch data from the table: "authors" - authors( - # distinct select on columns - distinct_on: [authors_select_column!] - - # limit the number of rows returned - limit: Int - - # skip the first n rows. Use only with order_by - offset: Int - - # sort the rows by one or more columns - order_by: [authors_order_by!] - - # filter the rows returned - where: authors_bool_exp - ): [authors!]! - - # fetch aggregated fields from the table: "authors" - authors_aggregate( - # distinct select on columns - distinct_on: [authors_select_column!] - - # limit the number of rows returned - limit: Int - - # skip the first n rows. Use only with order_by - offset: Int - - # sort the rows by one or more columns - order_by: [authors_order_by!] - - # filter the rows returned - where: authors_bool_exp - ): authors_aggregate! - - # fetch data from the table: "authors" using primary key columns - authors_by_pk(id: String!): authors - - # fetch data from the table: "paper_authors" - paper_authors( - # distinct select on columns - distinct_on: [paper_authors_select_column!] - - # limit the number of rows returned - limit: Int - - # skip the first n rows. Use only with order_by - offset: Int - - # sort the rows by one or more columns - order_by: [paper_authors_order_by!] - - # filter the rows returned - where: paper_authors_bool_exp - ): [paper_authors!]! - - # fetch aggregated fields from the table: "paper_authors" - paper_authors_aggregate( - # distinct select on columns - distinct_on: [paper_authors_select_column!] - - # limit the number of rows returned - limit: Int - - # skip the first n rows. Use only with order_by - offset: Int - - # sort the rows by one or more columns - order_by: [paper_authors_order_by!] - - # filter the rows returned - where: paper_authors_bool_exp - ): paper_authors_aggregate! - - # fetch data from the table: "paper_authors" using primary key columns - paper_authors_by_pk(author_id: String!, paper_id: String!): paper_authors - - # fetch data from the table: "papers" - papers( - # distinct select on columns - distinct_on: [papers_select_column!] - - # limit the number of rows returned - limit: Int - - # skip the first n rows. Use only with order_by - offset: Int - - # sort the rows by one or more columns - order_by: [papers_order_by!] - - # filter the rows returned - where: papers_bool_exp - ): [papers!]! - - # fetch aggregated fields from the table: "papers" - papers_aggregate( - # distinct select on columns - distinct_on: [papers_select_column!] - - # limit the number of rows returned - limit: Int - - # skip the first n rows. Use only with order_by - offset: Int - - # sort the rows by one or more columns - order_by: [papers_order_by!] - - # filter the rows returned - where: papers_bool_exp - ): papers_aggregate! - - # fetch data from the table: "papers" using primary key columns - papers_by_pk(id: String!): papers -} - -scalar smallint - -# expression to compare columns of type smallint. All fields are combined with logical 'AND'. -input smallint_comparison_exp { - _eq: smallint - _gt: smallint - _gte: smallint - _in: [smallint!] - _is_null: Boolean - _lt: smallint - _lte: smallint - _neq: smallint - _nin: [smallint!] -} - -# expression to compare columns of type String. All fields are combined with logical 'AND'. -input String_comparison_exp { - _eq: String - _gt: String - _gte: String - _ilike: String - _in: [String!] - _is_null: Boolean - _like: String - _lt: String - _lte: String - _neq: String - _nilike: String - _nin: [String!] - _nlike: String - _nsimilar: String - _similar: String -} - -# subscription root -type subscription_root { - # fetch data from the table: "authors" - authors( - # distinct select on columns - distinct_on: [authors_select_column!] - - # limit the number of rows returned - limit: Int - - # skip the first n rows. Use only with order_by - offset: Int - - # sort the rows by one or more columns - order_by: [authors_order_by!] - - # filter the rows returned - where: authors_bool_exp - ): [authors!]! - - # fetch aggregated fields from the table: "authors" - authors_aggregate( - # distinct select on columns - distinct_on: [authors_select_column!] - - # limit the number of rows returned - limit: Int - - # skip the first n rows. Use only with order_by - offset: Int - - # sort the rows by one or more columns - order_by: [authors_order_by!] - - # filter the rows returned - where: authors_bool_exp - ): authors_aggregate! - - # fetch data from the table: "authors" using primary key columns - authors_by_pk(id: String!): authors - - # fetch data from the table: "paper_authors" - paper_authors( - # distinct select on columns - distinct_on: [paper_authors_select_column!] - - # limit the number of rows returned - limit: Int - - # skip the first n rows. Use only with order_by - offset: Int - - # sort the rows by one or more columns - order_by: [paper_authors_order_by!] - - # filter the rows returned - where: paper_authors_bool_exp - ): [paper_authors!]! - - # fetch aggregated fields from the table: "paper_authors" - paper_authors_aggregate( - # distinct select on columns - distinct_on: [paper_authors_select_column!] - - # limit the number of rows returned - limit: Int - - # skip the first n rows. Use only with order_by - offset: Int - - # sort the rows by one or more columns - order_by: [paper_authors_order_by!] - - # filter the rows returned - where: paper_authors_bool_exp - ): paper_authors_aggregate! - - # fetch data from the table: "paper_authors" using primary key columns - paper_authors_by_pk(author_id: String!, paper_id: String!): paper_authors - - # fetch data from the table: "papers" - papers( - # distinct select on columns - distinct_on: [papers_select_column!] - - # limit the number of rows returned - limit: Int - - # skip the first n rows. Use only with order_by - offset: Int - - # sort the rows by one or more columns - order_by: [papers_order_by!] - - # filter the rows returned - where: papers_bool_exp - ): [papers!]! - - # fetch aggregated fields from the table: "papers" - papers_aggregate( - # distinct select on columns - distinct_on: [papers_select_column!] - - # limit the number of rows returned - limit: Int - - # skip the first n rows. Use only with order_by - offset: Int - - # sort the rows by one or more columns - order_by: [papers_order_by!] - - # filter the rows returned - where: papers_bool_exp - ): papers_aggregate! - - # fetch data from the table: "papers" using primary key columns - papers_by_pk(id: String!): papers -} - diff --git a/migrations/00000000000000_diesel_initial_setup/down.sql b/migrations/00000000000000_diesel_initial_setup/down.sql deleted file mode 100644 index 73d8541..0000000 --- a/migrations/00000000000000_diesel_initial_setup/down.sql +++ /dev/null @@ -1,7 +0,0 @@ --- This file was automatically created by Diesel to setup helper functions --- and other internal bookkeeping. This file is safe to edit, any future --- changes will be added to existing projects as new migrations. - -DROP FUNCTION IF EXISTS diesel_manage_updated_at(_tbl regclass); -DROP FUNCTION IF EXISTS diesel_set_updated_at(); -DROP EXTENSION pg_trgm; \ No newline at end of file diff --git a/migrations/00000000000000_diesel_initial_setup/up.sql b/migrations/00000000000000_diesel_initial_setup/up.sql deleted file mode 100644 index 3771d5e..0000000 --- a/migrations/00000000000000_diesel_initial_setup/up.sql +++ /dev/null @@ -1,38 +0,0 @@ --- This file was automatically created by Diesel to setup helper functions --- and other internal bookkeeping. This file is safe to edit, any future --- changes will be added to existing projects as new migrations. - - - - --- Sets up a trigger for the given table to automatically set a column called --- `updated_at` whenever the row is modified (unless `updated_at` was included --- in the modified columns) --- --- # Example --- --- ```sql --- CREATE TABLE users (id SERIAL PRIMARY KEY, updated_at TIMESTAMP NOT NULL DEFAULT NOW()); --- --- SELECT diesel_manage_updated_at('users'); --- ``` -CREATE OR REPLACE FUNCTION diesel_manage_updated_at(_tbl regclass) RETURNS VOID AS $$ -BEGIN - EXECUTE format('CREATE TRIGGER set_updated_at BEFORE UPDATE ON %s - FOR EACH ROW EXECUTE PROCEDURE diesel_set_updated_at()', _tbl); -END; -$$ LANGUAGE plpgsql; - -CREATE OR REPLACE FUNCTION diesel_set_updated_at() RETURNS trigger AS $$ -BEGIN - IF ( - NEW IS DISTINCT FROM OLD AND - NEW.updated_at IS NOT DISTINCT FROM OLD.updated_at - ) THEN - NEW.updated_at := current_timestamp; - END IF; - RETURN NEW; -END; -$$ LANGUAGE plpgsql; - -CREATE EXTENSION pg_trgm; \ No newline at end of file diff --git a/migrations/2020-04-29-090350_create_papers/down.sql b/migrations/2020-04-29-090350_create_papers/down.sql deleted file mode 100644 index 37c67c7..0000000 --- a/migrations/2020-04-29-090350_create_papers/down.sql +++ /dev/null @@ -1 +0,0 @@ -DROP TABLE papers; \ No newline at end of file diff --git a/migrations/2020-04-29-090350_create_papers/up.sql b/migrations/2020-04-29-090350_create_papers/up.sql deleted file mode 100644 index 6025720..0000000 --- a/migrations/2020-04-29-090350_create_papers/up.sql +++ /dev/null @@ -1,17 +0,0 @@ -CREATE TABLE papers ( - id VARCHAR PRIMARY KEY, - title TEXT NOT NULL, - year SMALLINT, - paper_abstract TEXT, - entities TEXT[], - fields_of_study TEXT[], - pdf_urls TEXT[], - doi TEXT, - doi_url TEXT, - s2_url TEXT, - in_citations TEXT[], - out_citations TEXT[] -); - -CREATE INDEX title_trgm_idx ON papers USING GIST (title gist_trgm_ops); -CREATE INDEX title_idx ON papers(title); diff --git a/migrations/2020-04-29-111644_create_authors/down.sql b/migrations/2020-04-29-111644_create_authors/down.sql deleted file mode 100644 index 38bcaf0..0000000 --- a/migrations/2020-04-29-111644_create_authors/down.sql +++ /dev/null @@ -1,2 +0,0 @@ -DROP TABLE authors; -DROP TABLE paper_authors; diff --git a/migrations/2020-04-29-111644_create_authors/up.sql b/migrations/2020-04-29-111644_create_authors/up.sql deleted file mode 100644 index d7a5065..0000000 --- a/migrations/2020-04-29-111644_create_authors/up.sql +++ /dev/null @@ -1,13 +0,0 @@ -CREATE TABLE authors ( - id VARCHAR PRIMARY KEY, - name VARCHAR NOT NULL -); - -CREATE TABLE paper_authors ( - author_id VARCHAR NOT NULL, - paper_id VARCHAR NOT NULL, - PRIMARY KEY(author_id, paper_id) -); - -CREATE INDEX paper_authors_author_id_idx ON paper_authors (author_id); -CREATE INDEX paper_authors_paper_id_idx ON paper_authors (paper_id); \ No newline at end of file diff --git a/migrations/2020-05-01-214409_create_functions/down.sql b/migrations/2020-05-01-214409_create_functions/down.sql deleted file mode 100644 index 71cada2..0000000 --- a/migrations/2020-05-01-214409_create_functions/down.sql +++ /dev/null @@ -1,3 +0,0 @@ -DROP FUNCTION cites; -DROP FUNCTION num_citations; - diff --git a/migrations/2020-05-01-214409_create_functions/up.sql b/migrations/2020-05-01-214409_create_functions/up.sql deleted file mode 100644 index ab05d29..0000000 --- a/migrations/2020-05-01-214409_create_functions/up.sql +++ /dev/null @@ -1,14 +0,0 @@ -CREATE FUNCTION cites(paper_row papers, limit_ integer) -RETURNS SETOF papers AS $$ - SELECT p2.* FROM papers p1 - JOIN papers p2 ON p2.id=ANY(p1.out_citations) - WHERE p1.id = paper_row.id AND p2.id != paper_row.id - LIMIT limit_ -$$ LANGUAGE SQL STABLE; - - -CREATE FUNCTION num_citations(paper_row papers) -RETURNS integer AS $$ - SELECT array_length(in_citations, 1) FROM papers - WHERE id = paper_row.id -$$ LANGUAGE SQL STABLE; \ No newline at end of file diff --git a/migrations/.gitkeep b/papergraph.graphql similarity index 100% rename from migrations/.gitkeep rename to papergraph.graphql diff --git a/papergraph.schema b/papergraph.schema new file mode 100644 index 0000000..053852b --- /dev/null +++ b/papergraph.schema @@ -0,0 +1,37 @@ +type Paper { + paper_id + title + abstract + year + s2_url + doi + doi_url + pdf_urls + fields_of_study + entities + + authors + cites +} + +: string @index(exact) . +: string @index(term) . +<abstract>: string . +<year>: int @index(int) . +<s2_url>: string . +<doi>: string . +<doi_url>: string . +<pdf_urls>: [string] . +<fields_of_study>: [string] . +<entities>: [string] . + +<authors>: [uid] @reverse . +<cites>: [uid] @count @reverse . + +type Author { + author_id + name +} + +<author_id>: string @index(exact) . +<name>: string @index(term) . \ No newline at end of file diff --git a/src/bin/papergraph.rs b/src/bin/papergraph.rs index e57e9a5..00c0f4b 100644 --- a/src/bin/papergraph.rs +++ b/src/bin/papergraph.rs @@ -6,7 +6,8 @@ use papergraph::io::Paper; use serde_json; use std::env; use std::fs::File; -use std::io::{self, BufRead}; +use std::io::{self, BufRead, Write}; +use std::io::BufWriter; #[derive(Clap)] #[clap(version = "0.1.0", author = "Denny Britz <dennybritz@gmail.com>")] @@ -18,13 +19,13 @@ struct Opts { #[derive(Clap)] enum SubCommand { /// Insert records into the database - #[clap(name = "insert", version = "0.1")] - Insert(Insert), + #[clap(name = "make-triples", version = "0.1")] + MakeTriples(MakeTriples), } -/// Insert records into the database +/// Generate RDF Triples #[derive(Clap, Debug)] -struct Insert { +struct MakeTriples { /// Read JSON records from this path #[clap(short = "d", long = "data")] data: String, @@ -49,11 +50,7 @@ struct Insert { field_of_study: Vec<String>, } -fn insert(opts: Insert) { - log::info!("establishing db connection"); - let database_url = env::var("DATABASE_URL").expect("DATABASE_URL must be set"); - let conn = papergraph::db::utils::establish_connection(&database_url); - +fn make_triples(opts: MakeTriples) { log::info!("reading records from {}", &opts.data); let file = File::open(&opts.data).expect("failed to open data file"); let reader: Box<dyn BufRead> = if opts.data.ends_with(".gz") { @@ -73,14 +70,16 @@ fn insert(opts: Insert) { .iter() .any(|fos| p.fields_of_study.contains(fos)) }); - - for chunk in &records.chunks(8192) { - let mut batch = papergraph::db::utils::RecordBatch::new(); - let papers: Vec<Paper> = chunk.collect(); - for paper in papers.iter() { - batch.append(&mut papergraph::db::utils::s2_record_to_batch(paper)); + + let mut out_writer = BufWriter::new(std::io::stdout()); + for record in records { + let triples = papergraph::dgraph::s2_record_to_rdf_triples(&record); + for triple in triples.iter() { + match write!(out_writer, "{}\n", &triple) { + Ok(_) => {}, + Err(e) => log::error!("{}", e) + } } - batch.insert(&conn).expect("database insert failed"); } } @@ -90,6 +89,6 @@ fn main() { let opts: Opts = Opts::parse(); match opts.cmd { - SubCommand::Insert(opts) => insert(opts), + SubCommand::MakeTriples(opts) => make_triples(opts), } } diff --git a/src/db/mod.rs b/src/db/mod.rs deleted file mode 100644 index 52f1d6b..0000000 --- a/src/db/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod models; -pub mod schema; -pub mod utils; diff --git a/src/db/models.rs b/src/db/models.rs deleted file mode 100644 index bb55d94..0000000 --- a/src/db/models.rs +++ /dev/null @@ -1,33 +0,0 @@ -use super::schema::{authors, paper_authors, papers}; -use diesel::{Identifiable, Insertable, Queryable}; - -#[derive(Debug, Identifiable, Insertable, Queryable, AsChangeset)] -#[table_name = "papers"] -pub struct Paper<'a> { - pub id: &'a str, - pub title: &'a str, - pub year: Option<i16>, - pub paper_abstract: &'a str, - pub fields_of_study: &'a Vec<String>, - pub entities: &'a Vec<String>, - pub pdf_urls: &'a Vec<String>, - pub s2_url: &'a str, - pub doi: &'a str, - pub doi_url: &'a str, - pub in_citations: &'a Vec<String>, - pub out_citations: &'a Vec<String>, -} - -#[derive(Debug, Identifiable, Insertable, Queryable, AsChangeset)] -#[table_name = "authors"] -pub struct Author<'a> { - pub id: &'a str, - pub name: &'a str, -} - -#[derive(Debug, Insertable, Queryable, AsChangeset)] -#[table_name = "paper_authors"] -pub struct PaperAuthor<'a> { - pub author_id: &'a str, - pub paper_id: &'a str, -} diff --git a/src/db/schema.rs b/src/db/schema.rs deleted file mode 100644 index dc93155..0000000 --- a/src/db/schema.rs +++ /dev/null @@ -1,36 +0,0 @@ -table! { - authors (id) { - id -> Varchar, - name -> Varchar, - } -} - -table! { - paper_authors (author_id, paper_id) { - author_id -> Varchar, - paper_id -> Varchar, - } -} - -table! { - papers (id) { - id -> Varchar, - title -> Text, - year -> Nullable<Int2>, - paper_abstract -> Nullable<Text>, - entities -> Nullable<Array<Text>>, - fields_of_study -> Nullable<Array<Text>>, - pdf_urls -> Nullable<Array<Text>>, - doi -> Nullable<Text>, - doi_url -> Nullable<Text>, - s2_url -> Nullable<Text>, - in_citations -> Nullable<Array<Text>>, - out_citations -> Nullable<Array<Text>>, - } -} - -allow_tables_to_appear_in_same_query!( - authors, - paper_authors, - papers, -); diff --git a/src/db/utils.rs b/src/db/utils.rs deleted file mode 100644 index c80f490..0000000 --- a/src/db/utils.rs +++ /dev/null @@ -1,113 +0,0 @@ -use diesel::pg::PgConnection; -use diesel::prelude::*; -use diesel::pg::upsert::excluded; -use itertools::Itertools; - -pub use super::{models, schema}; - -pub fn establish_connection(database_url: &str) -> PgConnection { - PgConnection::establish(database_url).expect(&format!("Error connecting to {}", database_url)) -} - -/// Each json record from Semantic Scholar maps to multiple database rows -/// which are accumulated in a `RecordBatch` -pub struct RecordBatch<'a> { - pub papers: Vec<models::Paper<'a>>, - pub authors: Vec<models::Author<'a>>, - pub paper_authors: Vec<models::PaperAuthor<'a>>, -} - -impl<'a> RecordBatch<'a> { - /// Creates a new empty RecordBatch - pub fn new() -> Self { - RecordBatch { - papers: vec![], - authors: vec![], - paper_authors: vec![], - } - } - - /// Inserts this RecordBatch into the database - pub fn insert(&self, conn: &PgConnection) -> Result<(), diesel::result::Error> { - let chunk_size = 4096; - - log::info!("inserting {} papers", &self.papers.len()); - for papers in &self.papers.iter().chunks(chunk_size) { - let papers: Vec<&models::Paper<'a>> = papers.collect(); - diesel::insert_into(schema::papers::table) - .values(papers) - .on_conflict(schema::papers::id) - .do_update() - .set(schema::papers::id.eq(excluded(schema::papers::id))) - .execute(conn)?; - } - - log::info!("inserting {} authors", &self.authors.len()); - for chunk in &self.authors.iter().chunks(chunk_size) { - let chunk: Vec<&models::Author<'a>> = chunk.collect(); - diesel::insert_into(schema::authors::table) - .values(chunk) - .on_conflict_do_nothing() - .execute(conn)?; - } - - for chunk in &self.paper_authors.iter().chunks(chunk_size) { - let chunk: Vec<&models::PaperAuthor<'a>> = chunk.collect(); - diesel::insert_into(schema::paper_authors::table) - .values(chunk) - .on_conflict_do_nothing() - .execute(conn)?; - } - - return Ok(()); - } - - /// Moves all elements of other into this RecordBatch - pub fn append(&mut self, other: &mut RecordBatch<'a>) { - self.papers.append(&mut other.papers); - self.authors.append(&mut other.authors); - self.paper_authors.append(&mut other.paper_authors); - } -} - -pub fn s2_record_to_batch<'a>(record: &'a crate::io::Paper) -> RecordBatch<'a> { - let paper = models::Paper { - id: &record.id, - title: &record.title, - year: record.year.map(|y| y as i16), - paper_abstract: &record.paper_abstract, - fields_of_study: &record.fields_of_study, - entities: &record.entities, - pdf_urls: &record.pdf_urls, - s2_url: &record.s2_url, - doi: &record.doi, - doi_url: &record.doi_url, - in_citations: &record.in_citations, - out_citations: &record.out_citations, - }; - - // TODO: Is it correct to filter out authors without ID!? - let authors: Vec<models::Author> = record - .authors - .iter() - .filter(|a| a.ids.len() > 0) - .map(|a| models::Author { - id: a.ids.get(0).unwrap(), - name: &a.name, - }) - .collect(); - - let paper_authors: Vec<models::PaperAuthor> = authors - .iter() - .map(|a| models::PaperAuthor { - paper_id: &record.id, - author_id: &a.id, - }) - .collect(); - - RecordBatch { - papers: vec![paper], - authors, - paper_authors, - } -} diff --git a/src/dgraph.rs b/src/dgraph.rs new file mode 100644 index 0000000..a515bd1 --- /dev/null +++ b/src/dgraph.rs @@ -0,0 +1,101 @@ +enum RDFObject<'a> { + Literal(&'a str), + UID(&'a str), +} + +use RDFObject::Literal; +use RDFObject::UID; + +impl<'a> RDFObject<'a> { + pub fn string(&self) -> String { + match self { + RDFObject::Literal(s) => { + let escaped = s + .replace("\\", "\\\\") + .replace(r###"""###, r###"\""###) + .replace("\n", "\\n") + .replace("\r", "\\r"); + format!("\"{}\"", escaped) + } + RDFObject::UID(s) => format!("{}", s), + } + } +} + +struct Triple<'a> { + pub subject: RDFObject<'a>, + pub predicate: &'a str, + pub object: RDFObject<'a>, +} + +impl<'a> Triple<'a> { + pub fn new(subject: RDFObject<'a>, predicate: &'a str, object: RDFObject<'a>) -> Self { + Triple { + subject, + predicate, + object, + } + } + + pub fn str(&self) -> String { + format!( + "{} {} {} .", + &self.subject.string(), + self.predicate, + self.object.string() + ) + } +} + +pub fn s2_record_to_rdf_triples<'a>(record: &'a crate::io::Paper) -> Vec<String> { + let mut res = vec![]; + let id = &record.id; + let blank = format!("_:{}", id); + let blank: &str = blank.as_ref(); + + res.push(Triple::new(UID(blank), "<dgraph.type>", Literal("Paper")).str()); + res.push(Triple::new(UID(blank), "<paper_id>", Literal(&record.id)).str()); + res.push(Triple::new(UID(blank), "<title>", Literal(&record.title)).str()); + res.push(Triple::new(UID(blank), "<abstract>", Literal(&record.paper_abstract)).str()); + if let Some(year) = record.year { + let year = format!("{}", year); + res.push(Triple::new(UID(blank), "<year>", Literal(&year)).str()); + } + res.push(Triple::new(UID(blank), "<s2_url>", Literal(&record.s2_url)).str()); + res.push(Triple::new(UID(blank), "<doi>", Literal(&record.doi)).str()); + res.push(Triple::new(UID(blank), "<doi_url>", Literal(&record.doi_url)).str()); + + for pdf_url in record.pdf_urls.iter() { + res.push(Triple::new(UID(blank), "<pdf_urls>", Literal(pdf_url)).str()); + } + for fos in record.fields_of_study.iter() { + res.push(Triple::new(UID(blank), "<fields_of_study>", Literal(fos)).str()); + } + for entity in record.entities.iter() { + res.push(Triple::new(UID(blank), "<entities>", Literal(entity)).str()); + } + + for author in record.authors.iter() { + for id in author.ids.first() { + let author_blank = format!("_:{}", id); + res.push(Triple::new(UID(&author_blank), "<dgraph.type>", Literal("Author")).str()); + res.push(Triple::new(UID(&author_blank), "<name>", Literal(&author.name)).str()); + res.push(Triple::new(UID(&author_blank), "<author_id>", Literal(id)).str()); + res.push(Triple::new(UID(blank), "<authors>", UID(&author_blank)).str()); + } + } + + for citation in record.out_citations.iter() { + let cit_blank = format!("_:{}", citation); + res.push(Triple::new(UID(&cit_blank), "<dgraph.type>", Literal("Paper")).str()); + res.push(Triple::new(UID(blank), "<cites>", UID(&cit_blank)).str()); + } + + for citation in record.in_citations.iter() { + let cit_blank = format!("_:{}", citation); + res.push(Triple::new(UID(&cit_blank), "<dgraph.type>", Literal("Paper")).str()); + res.push(Triple::new(UID(&cit_blank), "<cites>", UID(blank)).str()); + } + + res +} diff --git a/src/lib.rs b/src/lib.rs index 50c038d..2404bc5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,2 @@ -#[macro_use] -extern crate diesel; - -pub mod db; +pub mod dgraph; pub mod io;