diff --git a/preprocessing/create_nrPDB_GO_annot.py b/preprocessing/create_nrPDB_GO_annot.py
index 0017f44..0c74efc 100644
--- a/preprocessing/create_nrPDB_GO_annot.py
+++ b/preprocessing/create_nrPDB_GO_annot.py
@@ -138,6 +138,14 @@ def write_output_files(fname, pdb2go, go2info, pdb2seq):
     onts = ['molecular_function', 'biological_process', 'cellular_component']
     selected_goterms = {ont: set() for ont in onts}
     selected_proteins = set()
+    for goterm in go2info:
+        prots = go2info[goterm]['pdb_chains']
+        num = len(prots)
+        namespace = go2info[goterm]['ont']
+        if num > 49 and num <= 5000:
+            selected_goterms[namespace].add(goterm)
+            selected_proteins = selected_proteins.union(prots)
+    """
     for chain in pdb2go:
         goterms = set(pdb2go[chain]['goterms'])
         if len(goterms) > 2 and chain in pdb2seq:
@@ -145,9 +153,10 @@ def write_output_files(fname, pdb2go, go2info, pdb2seq):
                 prots = go2info[goterm]['pdb_chains']
                 num = len(prots)
                 namespace = go2info[goterm]['ont']
-                if num > 49 and num < 5000:
+                if num > 19 and num <= 5000:
                     selected_goterms[namespace].add(goterm)
                     selected_proteins = selected_proteins.union(prots)
+    """
 
     selected_goterms_list = {ont: list(selected_goterms[ont]) for ont in onts}
     selected_gonames_list = {ont: [go2info[goterm]['goname'] for goterm in selected_goterms_list[ont]] for ont in onts}
diff --git a/preprocessing/data_collection.sh b/preprocessing/data_collection.sh
index f64a023..688afbd 100755
--- a/preprocessing/data_collection.sh
+++ b/preprocessing/data_collection.sh
@@ -17,7 +17,7 @@ printf "\n\n  DOWNLOADING PDB SEQRES SEQUENCES...\n"
 wget ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt.gz -O $DATA_DIR/pdb_seqres.txt.gz
 
 printf "\n\n  DOWNLOADING PDB CLUSTERS...\n"
-wget ftp://resources.rcsb.org/sequence/clusters/bc-$SEQ_SIM.out -O $DATA_DIR/bc-$SEQ_SIM.out
+wget https://cdn.rcsb.org/resources/sequence/clusters/bc-$SEQ_SIM.out -O $DATA_DIR/bc-$SEQ_SIM.out
 
 printf "\n\n  DOWNLOADING GO HIERARCHY...\n"
 wget http://purl.obolibrary.org/obo/go/go-basic.obo -O $DATA_DIR/go-basic.obo