-
Notifications
You must be signed in to change notification settings - Fork 0
/
pre_process_multi_threaded.py
executable file
·1031 lines (926 loc) · 34.3 KB
/
pre_process_multi_threaded.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
abb = {
'A2' : 'aortic second sound',
'AAA' : 'abdominal aortic aneurysm',
'abd' : 'abdominal',
'ABG' : 'arterial blood gas',
'ABI' : 'ankle-brachial index',
'ABO' : 'three basic blood groups',
'AC' : 'adrenal cortex',
'a.c.' : 'before a meal',
'ac' : 'before a meal',
'acc.' : 'accommodation',
'A/CA' : 'convergence accommodation ratio',
'ACE' : 'angiotensin-converting enzyme',
'ACh' : 'acetylcholine',
'AChE' : 'acetylcholinesterase',
'AChR' : 'acetylcholine receptor',
'ACLS' : 'advanced cardiac life support',
'ACTH' : 'adrenocorticotropic hormone',
'AD' : 'advance directive',
'ad' : 'to',
'ADH' : 'antidiuretic hormone',
'ADHD' : 'attention deficit-hyperactivity disorder',
'ADL' : 'activities of daily living',
'ADLs' : 'activities of daily living',
'ad lib.' : 'freely',
'admov.' : 'apply',
'ad sat.' : 'to saturation',
'AED' : 'antiepileptic drug',
'AF' : 'atrial fibrillation',
'AFB' : 'acid-fast bacillus',
'AFP' : 'alpha-fetoprotein',
'A/G' : 'albumin/globulin ratio',
'A-G ratio' : 'albumin/globulin ratio',
'Ag' : 'silver',
'AGC' : 'atypical glandular cells',
'AgNO3' : 'silver nitrate',
'ah' : 'hypermetropic astigmatism',
'AHF' : 'antihemophilic factor',
'AI' : 'aortic incompetence',
'AICD' : 'automatic implantable cardiac defibrillator',
'AIDS' : 'acquired immunodeficiency syndrome',
'AK' : 'above the knee',
'Al' : 'aluminum',
'Alb' : 'albumin',
'ALP' : 'alkaline phosphatase',
'ALS' : 'amyotrophic lateral sclerosis',
'ALT' : 'alanine aminotransferase',
'alt. dieb.' : 'every other day',
'alt. hor.' : 'every other hour',
'alt. noc.' : 'every other night',
'a.m.a.' : 'against medical advice',
'AMI' : 'acute myocardial infarction',
'AML' : 'acute myelogenous leukemia',
'AMLS' : 'Advanced Medical Life Support',
'amp' : 'ampule',
'ANA' : 'antinuclear antibody',
'anat' : 'anatomy',
'ANNA' : 'anti-neuronal nuclear antibody',
'ANP' : 'atrial natriuretic peptide',
'ant.' : 'anterior',
'anti-CCP' : 'anticyclic citrullinated peptide',
'Ao.' : 'aorta',
'A-P' : 'anterior-posterior',
'A&P' : 'auscultation and percussion',
'ap' : 'before dinner',
'APAP' : 'acetaminophen',
'aPTT' : 'activated partial thromboplastin',
'AQ' : 'water',
'aq' : 'water',
'aq. dest.' : 'distilled water',
'aq. frig.' : 'cold water',
'ARC' : 'AIDS related complex',
'ARDS' : 'acute respiratory distress syndrome',
'ARMD' : 'age related macular degeneration',
'ASA' : 'acetylsalicylic acid',
'ASC' : 'atypical squamous cells',
'asc.' : 'ascending',
'ASCA' : 'anti Saccharomyces cerevisiae antibody',
'ASC-US' : 'atypical squamous cells of undetermined significance',
'ASCVD' : 'atherosclerotic cardiovascular disease',
'ASD' : 'atrial septal defect',
'AsH' : 'hypermetropic astigmatism',
'AsM' : 'myopic astigmatism',
'AST' : 'aspartate aminotransferase',
'Ast' : 'astigmatism',
'ATCC' : 'American Type Culture Collection',
'at. wt.' : 'atomic weight',
'Au' : 'gold',
'A-V' : 'atrioventricular',
'AV' : 'atrioventricular',
'A/V' : 'atrioventricular',
'av.' : 'avoirdupois',
'AVM' : 'arteriovenous malformation',
'AVP' : 'arginine vasopressin',
'Ba' : 'barium',
'BAC' : 'blood alcohol concentration',
'BBB' : 'blood-brain barrier',
'BBT' : 'basal body temperature',
'BCG' : 'bacille Calmette-Guérin',
'BCLS' : 'basic cardiac life support',
'BCP' : 'birth control pills',
'BD' : 'Buerger disease',
'BHS' : 'beta-hemolytic streptococci',
'Bi' : 'bismuth',
'bib.' : 'drink',
'bid' : 'twice a day',
'b.i.d.' : 'twice a day',
'b.i.n.' : 'twice a night',
'bipap' : 'bilevel positive airway pressure',
'BK' : 'below the knee',
'BLS' : 'basic life support',
'BM' : 'bowel movement',
'BMI' : 'body mass index',
'BMR' : 'basal metabolic rate',
'BMS' : 'bone marrow suppression',
'BMT' : 'bone marrow transplantation',
'BNP' : 'brain natriuretic peptide',
'bol.' : 'pill',
'BP' : 'blood pressure',
'BPH' : 'benign prostatic hyperplasia',
'bpm' : 'beats per minute',
'BRM' : 'biologic response modifier',
'BROW' : 'barley, rye, oats, and wheat',
'BSA' : 'body surface area',
'BSE' : 'breast self-examination',
'BUN' : 'blood urea nitrogen',
'BW' : 'birth weight',
'Bx' : 'biopsy',
'CA' : 'coronary artery',
'ca.' : 'about',
'CABG' : 'coronary artery bypass graft',
'CaCO3' : 'calcium carbonate',
'CAD' : 'coronary artery disease',
'CAH' : 'chronic active hepatitis',
'Cal' : 'large calorie',
'CAP': 'let take',
'cap.' : 'capsule',
'C&S' : 'culture and sensitivity',
'cath' : 'catheter',
'CBC' : 'complete blood count',
'CBI' : 'continuous bladder irrigation',
'CBT' : 'cognitive behavioral therapy',
'CCl4' : 'carbon tetrachloride',
'CCU' : 'critical care unit',
'CD4' : 'T-helper cells',
'CD8' : 'cytotoxic cells',
'CDC' : 'Centers for Disease Control and Prevention',
'CEA' : 'carcinoembryonic antigen',
'CF' : 'cystic fibrosis',
'CFTR' : 'cystic fibrosis transmembrane regulator',
'cg' : 'centigram',
'CHD' : 'congenital heart disease',
'ChE' : 'cholinesterase',
'CHF' : 'congestive heart failure',
'CI' : 'cardiac index',
'Ci' : 'curie',
'CIN' : 'cervical intraepithelial neoplasia',
'CIS' : 'carcinoma in situ',
'CK' : 'creatine kinase',
'CK-MB' : 'serum creatine kinase',
'Cl' : 'chlorine',
'CLL' : 'chronic lymphocytic leukemia',
'cm' : 'centimeter',
'c.m.s.' : 'to be taken tomorrow morning',
'CMT' : 'certified medication technician',
'CMV' : 'cytomegalovirus',
'c.n.' : 'tomorrow night',
'CNS' : 'central nervous system',
'c.n.s.' : 'to be taken tomorrow night',
'CO' : 'carbon monoxide',
'CO2' : 'carbon dioxide',
'Co' : 'cobalt',
'c/o' : 'complains of',
'COLD' : 'chronic obstructive lung disease',
'comp.' : 'compound',
'COMT' : 'catechol-O-methyltransferase',
'COPD' : 'chronic obstructive pulmonary disease',
'COX-2' : 'cyclooxygenase 2 inhibitors',
'CP' : 'cerebral palsy',
'CPAP' : 'continuous positive airway pressure',
'CPC' : 'clinicopathologic conference',
'CPD' : 'cephalopelvic disproportion',
'CPHSS' : 'Cincinnati Prehospital Stroke Scale',
'CPK' : 'creatine phosphokinase',
'CPM' : 'continuous passive motion',
'CPR' : 'cardiopulmonary resuscitation',
'CR' : 'conditioned reflex',
'CREST' : 'calcinosis, Raynaud phenomenon, esophageal dysfunction, sclerodactyly, telangiectasia (cluster of features of systemic sclerosis scleroderma)',
'CRP' : 'c. reactive protein',
'CRS-R' : 'Conners Rating Scales-Revised',
'CS' : 'cardiogenic shock',
'CSF' : 'cerebrospinal fluid',
'CSH' : 'combat support hospital',
'CT' : 'computerized tomography',
'Cu' : 'copper',
'CV' : 'cardiovascular',
'CVA' : 'cardiovascular accident',
'CVC' : 'central venous catheter',
'CVP' : 'central venous pressure',
'CVRB' : 'critical value read back',
'CVS' : 'chorionic villi sampling',
'CXR' : 'chest x-ray',
'/d' : 'per day',
'D and C' : 'dilatation and curettage',
'dB' : 'decibel',
'DBP' : 'diastolic blood pressure',
'DC' : 'direct current',
'dc' : 'discontinue',
'Derm' : 'dermatology',
'det.' : 'let it be given',
'DEXA' : 'dual-energy x-ray absorptiometry',
'DFV' : 'Doppler flow velocimetry',
'DHT' : 'dihydrotestosterone',
'DI' : 'diabetes insipidus',
'DIC' : 'disseminated intravascular coagulation',
'dieb. alt.' : 'every other day',
'dieb. tert.' : 'every third day',
'dil.' : 'dilute',
'dim.' : 'halved',
'DISIDA (scan)' : 'diisopropyl iminodiacetic acid (cholescintigraphy)',
'DJD' : 'degenerative joint disease',
'DKA' : 'diabetic ketoacidosis',
'dL' : 'deciliter',
'DM' : 'diabetes mellitus',
'DMARD' : 'disease-modulating antirheumatic drug',
'DNA' : 'deoxyribonucleic acid',
'DNH' : 'do not hospitalize',
'DNR' : 'do not resuscitate',
'DOA' : 'dead on arrival',
'DOB' : 'date of birth',
'DOE' : 'dyspnea on exertion',
'DPat' : 'diphtheria-acellular pertussis tetanus (vaccine)',
'DPT' : 'diphtheria-pertussis-tetanus (vaccine)',
'dr.' : 'dram',
'DRE' : 'digital rectal examination',
'DRG' : 'diagnosis-related group',
'DSM-IV-TR' : 'Diagnostic and Statistical Manual of Mental Disorders, 4th Edition, Text Revision',
'DTR' : 'deep tendon reflex(es)',
'DTs' : 'delirium tremens',
'dur. dolor' : 'while pain lasts',
'DVT' : 'deep vein thrombosis',
'Dx' : 'diagnosis',
'DWI' : 'driving while intoxicated',
'EBV' : 'EpsteinnBarr virus',
'ECF' : 'extended care facility',
'ECG' : 'electrocardiogram',
'ECHO' : 'echocardiography',
'ECMO' : 'extracorporeal membrane oxygenation',
'ECT' : 'electroconvulsive therapy',
'ED' : 'emergency department',
'EDD' : 'estimated date of delivery',
'EEG' : 'electroencephalogram',
'EENT' : 'eye, ear, nose, and throat',
'EF' : ' ejection fraction',
'EGD' : 'esophagogastroduodenoscopy',
'EIA' : 'enzyme immunosorbent assay',
'EKG' : 'electrocardiogram',
'ELISA' : 'enzyme-linked immunosorbent assay',
'elix.' : 'elixir',
'Em' : 'emmetropia',
'EMA-IgA' : 'immunoglobulin A antiendomysial',
'EMG' : 'electromyogram',
'EMS' : 'emergency medical service',
'Endo' : 'endocrine',
'ENT' : 'ear, nose, and throat',
'EOM' : 'extraocular muscles',
'EP' : 'extrapyramidal',
'EPS' : 'extrapyramidal symptoms',
'ER' : 'Emergency Room',
'ERCP' : 'endoscopic retrograde cholangiopancreatography',
'ESR' : 'erythrocyte sedimentation rate',
'ESRD' : 'end-stage renal disease',
'EST' : 'electroshock therapy',
'ESWL' : 'extracorporeal shock wave lithotripsy',
'ET-1' : ' endothelin-1',
'EtOH' : 'ethyl alcohol',
'ETOH' : 'ethyl alcohol',
'ext.' : 'extensor',
'F' : 'Fahrenheit',
'f' : 'female',
'FA' : 'fatty acid',
'F and E' : 'fluid and electrolyte',
'FAP' : 'familial adenomatous polyposis',
'FBS' : 'fasting blood sugar',
'FD' : 'fatal dose',
'FDA' : 'Food and Drug Administration',
'Fe' : 'iron',
'FEV' : 'forced expiratory volume',
'FFP' : 'fresh frozen plasma',
'FHT' : 'fetal heart tone',
'FISH' : 'fluorescence in situ hybridization',
'fl.' : 'flexor',
'Fld' : 'fluid',
'FP' : 'family practice',
'FSH' : 'follicle stimulating hormone',
'FTT' : 'failure to thrive',
'FUO' : 'fever of unknown origin',
'G' : 'gram',
'g' : 'gram',
'gm' : 'gram',
'GABA' : 'gamma-aminobutyric acid',
'GABAB' : 'gamma-aminobutyric acid type B',
'GABRB3' : 'GABAA receptor gene',
'garg' : 'gargle',
'GB' : 'gallbladder',
'GC' : 'gonococcus or gonorrheal',
'GDM' : 'gestational diabetes mellitus',
'GDS' : 'Geriatric Depression Scale',
'GERD' : 'gastroesophageal reflux disease',
'GFR' : 'glomerular filtration rate',
'GGT' : 'gamma glutamyl transferase',
'GH' : 'growth hormone',
'GI' : 'gastrointestinal',
'GnRH' : 'gonadotropin releasing hormone',
'GP' : 'general practitioner',
'G6PD' : 'glucose 6 phosphate dehydrogenase',
'gr' : 'grain',
'grad' : 'by degrees',
'GRAS' : 'generally recognized as safe',
'GSW' : 'gunshot wound',
'GTT' : 'glucose tolerance test',
'Gtt' : 'drops',
'gtt' : 'drops',
'GU' : 'genitourinary',
'guttat.' : 'drop by drop',
'GVHD' : 'graft-versus-host disease',
'GYN' : 'gynecology',
'H' : 'hydrogen',
'H+' : 'hydrogen ion',
'h' : 'hour',
'hr' : 'hour',
'H&H' : 'hematocrit and hemoglobin',
'H1N1' : 'hemagglutinin type 1 and neuraminidase type 1',
'H2' : 'histamine 2',
'HAART' : 'highly active antiretroviral therapy',
'HAV' : 'hepatitis A virus',
'HBV' : 'hepatitis B virus',
'HCG' : 'human chorionic gonadotropin',
'HCP' : 'health care professional',
'HCT, Hct' : 'hematocrit',
'Hct' : 'hematocrit',
'HCV' : 'hepatitis C virus',
'HD' : 'hearing distance',
'HDL' : 'high-density lipoprotein',
'HDV' : 'hepatitis D',
'HEENT' : 'head, eye, ear, nose, and throat',
'HELLP' : 'hemolysis, elevated liver enzymes, low platelets',
'HEPA' : 'high-efficiency particulate air',
'HER2' : 'human EGF (epidermal growth factor) receptor 2',
'HEV' : 'hepatitis E',
'HF' : 'heart failure',
'Hg' : 'mercury',
'hgb' : 'hemoglobin',
'HGSIL' : 'high-grade squamous intraepithelial lesion',
'Hib' : 'Haemophilus influenzae type B',
'HIDA' : 'hepatobiliary iminodiacetic acid (cholescintigraphy)',
'HIV' : 'human immunodeficiency virus',
'HLA' : 'human leukocyteantigen',
'h/o' : 'history of',
'HOB' : 'head of bed',
'H2O' : 'water',
'H2O2' : 'hydrogen peroxide',
'hor. decub.' : 'bedtime',
'hor. som' : 'bedtime',
'h.s.' : 'bedtime',
'HPI' : 'history of present illness',
'HPV' : 'human papillomavirus',
'HR' : 'heart rate',
'HRT' : 'hormone replacement therapy',
'HSIL' : 'high grade squamous intraepithelial lesion',
'HSV' : 'herpes simplex virus',
'HTN' : 'hypertension',
'hx' : 'history',
'Hx' : 'history',
'Hy' : 'hyperopia',
'Hz' : 'hertz',
'I' : 'iodine',
'131I' : 'radioactive isotope of iodine',
'132I' : 'radioactive isotope of iodine',
'I&O' : 'intake and output',
'IBW' : 'ideal body weight',
'IC' : 'inspiratory capacity',
'ICD' : 'implantable cardioverter defibrillator',
'ICP' : 'intracranial pressure',
'ICS' : 'intercostal space',
'ICSH' : 'interstitial cell stimulating hormone',
'ICU' : 'intensive care unit',
'Id.' : 'the same',
'IDDM' : 'insulin-dependent diabetes mellitus',
'IDM' : 'infants of diabetic mothers',
'IED' : 'improvised explosive device',
'Ig' : 'immunoglobulin',
'IgE' : 'immunoglobulin E',
'IgG' : 'immunoglobulin G',
'IL-1' : 'interleukin 1',
'IL-8' : 'interleukin 8',
'IM' : 'intramuscular',
'in d.' : 'daily',
'INF' : 'interferon',
'inf.' : 'inferior',
'inj.' : 'injection',
'INR' : 'international normalized ratio',
'instill.' : 'instillation',
'int.' : 'internal',
'IOP' : 'intraocular pressure',
'IPPB' : 'intermittent positive pressure breathing',
'IQ' : 'intelligence quotient',
'IRV' : 'inspiratory reserve volume',
'I.U.' : 'international unit',
'IUCD' : 'intrauterine contraceptive device',
'IUD' : 'intrauterine device',
'IUFD' : 'intrauterine fetal death',
'IV' : 'intravenous',
'IVP' : 'intravenous pyelogram',
'J' : 'joule',
'JNC 7' : 'The Seventh Report of the Joint National Committee on Prevention, Detection, Evaluation, and Treatment of High Blood Pressure',
'JRA' : 'juvenile rheumatoid arthritis',
'jt.' : 'joint',
'K' : 'potassium',
'kg' : 'kilogram',
'KI' : 'potassium iodine',
'KOH' : 'potassium hydroxide',
'KS' : 'Kaposi sarcoma',
'KUB' : 'kidney, ureter, and bladder',
'kv' : 'kilovolt',
'KVO' : 'keep vein open',
'L' : 'liter',
'L&D' : 'labor and delivery',
'lab' : 'laboratory',
'lat' : 'lateral',
'lb' : 'pound',
'LBW' : 'low birth weight',
'LD50' : 'lethal dose',
'LDH' : 'lactate dehydrogenase',
'LDL' : 'low density lipoprotein',
'LE' : 'lower extremity',
'LEEP' : 'loop electrosurgical excision procedure',
'LFT' : 'liver function test',
'LGA' : 'large for gestational age',
'LH' : 'luteinizing hormone',
'Li' : 'lithium',
'lig' : 'ligament',
'liq.' : 'liquid',
'LLE' : 'left lower extremity',
'LLL' : 'left lower lobe',
'LLQ' : 'left lower quadrant',
'lmp' : 'last menstrual period',
'LOC' : 'level of consciousness',
'LP' : 'lumbar puncture',
'LR' : 'lactated Ringer',
'LSIL' : 'low grade squamous epithelial lesion',
'LTD' : 'lowest tolerated dose',
'LUE' : 'left upper extremity',
'LUL' : 'left upper lobe',
'LUQ' : 'left upper quadrant',
'LV' : 'left ventricle',
'LVAD' : 'left ventricular assist device',
'LVH' : 'left ventricular hypertrophy',
'M' : 'male',
'MA' : 'mental age',
'MAO-B' : 'monoamine oxidase B',
'man. prim.' : 'first thing in the morning',
'MAP' : 'mean arterial pressure',
'MAT' : 'Miller Analogies Test',
'MBD' : 'minimal brain dysfunction',
'mc' : 'millicurie',
'mCi' : 'millicurie',
'mcg' : 'microgram',
'MCH' : 'mean corpuscular hemoglobin',
'MCHC' : 'mean corpuscular hemoglobin concentration',
'MCV' : 'mean corpuscular volume',
'MD' : 'muscular dystrophy',
'MDI' : 'metered dose inhaler',
'MED' : 'minimum effective dose',
'med' : 'medial',
'MELD' : 'Model for End-Stage Liver Disease',
'µEq' : 'microequivalent',
'mEq' : 'milliequivalent',
'mEq/L' : 'milliequivalent per liter',
'ME ratio' : 'myeloid erythroid ratio',
'MG' : 'myasthenia gravis',
'Mg' : 'magnesium',
'MgSO4' : 'magnesium sulfate',
'µg' : 'microgram',
'mg' : 'milligram',
'MI' : 'myocardial infarction',
'MID' : 'minimum infective dose',
'mist.' : 'a mixture',
'ml' : 'milliliter',
'MLD' : 'minimum lethal dose',
'MLF' : 'medial longitudinal fasciculus',
'MM' : 'mucous membrane',
'mm' : 'millimeter',
'mm Hg' : 'millimeters of mercury',
'mMol' : 'millimole',
'MMR' : 'measles mumps rubella',
'MMSE' : 'Mini Mental Status Examination',
'Mn' : 'manganese',
'mol wt' : 'molecular weight',
'mor. dict.' : 'as directed',
'mor. sol.' : 'as accustomed',
'MPC' : 'maximum permitted concentration',
'MPN' : 'most probable number',
'mr' : 'milliroentgen',
'MRA' : 'magnetic resonance angiography',
'MRgFUS' : 'MR guided focused ultrasound surgery',
'MRI' : 'magnetic resonance imaging',
'MS' : 'mitral stenosis',
'MV' : 'mitral valve',
'mV' : 'millivolt',
'MVA' : 'motor vehicle accident',
'MW' : 'molecular weight',
'N' : 'nitrogen',
'n' : 'nerve',
'N/A' : 'not applicable',
'Na' : 'sodium',
'NAA' : 'nucleic acid amplification',
'NAD' : 'no acute distress',
'n.b.' : 'note well',
'nCi' : 'nanocurie',
'NDC' : 'National Drug Code',
'ng' : 'nasogastric',
'NG' : 'nasogastric',
'NGT' : 'nasogastric tube',
'NH3' : 'ammonia',
'Ni' : 'nickel',
'NICU' : 'neonatal intensive care unit',
'NIDDM' : 'noninsulin-dependent diabetes mellitus',
'NIH' : 'National Institutes of Health',
'NK' : 'natural killer',
'NKA' : 'no known allergies',
'NMDA' : 'N methyl D aspartate',
'NMJ' : 'neuromuscular junction',
'NMS' : 'neuroleptic malignant syndrome',
'nn' : 'nerves',
'noct.' : 'in the night',
'noct. maneq.' : 'night and morning',
'n.r.' : 'do not repeat',
'non rep' : 'do not repeat',
'NPN' : 'nonprotein nitrogen',
'NPO' : 'nothing by mouth',
'n.p.o.' : 'nothing by mouth',
'NRC' : 'normal retinal correspondence',
'NS' : 'normal saline',
'NSAID' : 'nonsteroidal anti inflammatory drug',
'NSR' : 'normal sinus rhythm',
'N&V' : 'nausea and vomiting',
'N/V' : 'nausea and vomiting',
'OB' : 'obstetrics',
'OC' : 'oral contraceptive',
'OCD' : 'obsessive compulsive disorder',
'O.D.' : 'right eye',
'ol.' : 'oil',
'om. mane vel noc.' : 'every morning or night',
'omn. hor.' : 'every hour',
'omn. noct.' : 'every night',
'OmPC' : 'outer membrane porin C',
'OOB' : 'out of bed',
'OPD' : 'outpatient department',
'OR' : 'operating room',
'ORIF' : 'open reduction with internal fixation',
'O.S.' : 'left eye',
'OSHA' : 'Occupational Safety and Health Administration',
'OT' : 'occupational therapy',
'OTC' : 'over the counter',
'OU' : 'each eye',
'oz' : 'ounce',
'P2' : 'pulmonic second sound',
'PA' : 'placenta abruption',
'P-A' : 'placenta abruption',
'pa' : 'placenta abruption',
'PABA' : 'para aminobenzoic acid',
'PaCO2' : 'partial pressure of carbon dioxide in alveolar gas',
'PACU' : 'postanesthesia care unit',
'PAD' : 'peripheral arterial disease',
'PALS' : 'pediatric advanced life support',
'P-ANCA' : 'perinuclear antineutrophil cytoplasmic antibody',
'PAO2' : 'alveolar oxygen partial pressure',
'Pap test' : 'Papanicolaou smear',
'Pap' : 'Papanicolaou smear',
'part. vic' : 'in divided doses',
'Pb' : 'lead',
'PBI' : 'protein bound iodine',
'p.c.' : 'after meals',
'PCA' : 'patient controlled analgesia',
'PCO2' : 'carbon dioxide pressure',
'PCOS' : 'polycystic ovarian syndrome',
'PCP' : 'Pneumocystis carinii pneumonia',
'PCR' : 'polymerase chain reaction',
'PCWP' : 'pulmonary capillary wedge pressure',
'PD' : 'Parkinson disease',
'pd' : 'pupillary distance',
'PDA' : 'patent ductus arteriosus',
'PDR' : 'Physicians Desk Reference',
'PE' : 'pulmonary embolism',
'PEEP' : 'positive end expiratory pressure',
'PEFR' : 'peak expiratory flow rate',
'PEG' : 'percutaneous endoscopic gastrostomy',
'per' : 'through or by',
'PERRLA' : 'pupils equal, regular, react to light and accommodation',
'PET' : 'positron emission tomography',
'P4P' : 'pay for performance',
'PFP' : 'pay for performance',
'PFT' : 'pulmonary function test',
'pH' : 'hydrogen ion concentration',
'Phar.' : 'pharmacy',
'Pharm' : 'pharmacy',
'PICC' : 'peripherally inserted central catheter',
'PID' : 'pelvic inflammatory disease',
'PIH' : 'pregnancy-induced hypertension',
'pil.' : 'pill',
'PIP' : 'proximal interphalangeal',
'PIPDA' : '99mTc para isopropylacetanilido iminodiaacetic acid',
'PKU' : 'phenylketonuria',
'PM' : 'afternoon',
'PMH' : 'past medical history',
'PMI' : 'point of maximal impulse',
'PMN' : 'polymorphonuclear neutrophil leukocytes',
'PMS' : 'premenstrual syndrome',
'PND' : 'paroxysmal nocturnal dyspnea',
'PNH' : 'paroxysmal nocturnal hemoglobinuria',
'PNS' : 'peripheral nervous system',
'PO' : 'orally',
'p.o.' : 'orally',
'POLST' : 'physician orders for life sustaining therapy',
'post.' : 'posterior',
'PP' : 'placenta previa',
'PPD' : 'purified protein derivative',
'ppm' : 'parts per million',
'p.r.' : 'through the rectum',
'PRBCs' : 'packed red blood cells',
'p.r.n.' : 'as needed',
'pro time/PT' : 'prothrombin time',
'PSA' : 'prostate specific antigen',
'PSV' : 'prostate specific antigen',
'PT' : 'prothrombin time',
'Pt' : 'patient',
'pt' : 'pint',
'PTT' : 'partial thromboplastin time',
'Pu' : 'plutonium',
'PUBS' : 'percutaneous umbilical blood sampling',
'PUVA' : 'psoralen ultraviolet A',
'p.v.' : 'through the vagina',
'PVC' : 'premature ventricular contraction',
'PVR' : 'peripheral vascular resistance',
'q' : 'every',
'q.d.' : 'every day',
'QFT-G' : 'QuantiFERON TB Gold',
'q.h.' : 'every hour',
'q.2h.' : 'every 2 hours',
'q.3h.' : 'every 3 hours',
'q.4h.' : 'every 4 hours',
'q.i.d.' : 'four times a day',
'q.l.' : 'as much as wanted',
'qns' : 'quantity not sufficient',
'q.o.d.' : ' every other day',
'q.p.' : 'as much as desired',
'q.s.' : 'as much as needed',
'qt' : 'quart',
'q.v.' : 'as much as you please',
'RA' : 'rheumatoid arthritis',
'Ra' : 'radium',
'rad' : 'radiation absorbed dose',
'RAI' : 'radioactive iodine',
'RAIU' : 'radioactive iodine uptake',
'RBC' : 'red blood cell',
'RD' : 'Raynaud disease',
'RDA' : 'recommended daily',
'RDS' : 'respiratory distress syndrome',
'RE' : 'right eye',
'Re' : 'rhenium',
'REM' : 'rapid eye movement',
'RF' : 'rheumatoid factor',
'RFT' : 'renal function test',
'Rh' : 'rhesus factor',
'RHD' : 'rheumatic heart disease',
'RLE' : 'right lower extremity',
'RLL' : 'right lower lobe',
'RLQ' : 'right lower quadrant',
'RML' : 'right middle lobe of lung',
'Rn' : 'radon',
'RNA' : 'ribonucleic acid',
'R/O' : 'rule out',
'ROM' : 'range of motion',
'ROS' : 'review of systems',
'RPM' : 'revolutions per minute',
'RQ' : 'respiratory quotient',
'RR' : 'recovery room',
'RSV' : 'respiratory syncytial virus',
'RT' : 'radiation therapy; respiratory therapy',
'R/T' : 'related to',
'RUE' : 'right upper extremity',
'RUL' : 'right upper lobe',
'RUQ' : 'right upper quadrant',
'S-A' : 'sinoatrial',
'S/A' : 'sinoatrial',
'SA' : 'sinoatrial',
'SAD' : 'seasonal affective disorder',
'SARS' : 'severe acute respiratory syndrome',
'SB' : 'small bowel',
'Sb' : 'antimony',
'SBP' : 'systolic blood pressure',
's.c.' : 'subcutaneous',
'sc' : 'subcutaneous',
'SC' : 'subcutaneous',
'SCI' : 'spinal cord injury',
'S.D.' : 'standard deviation',
'SDAT' : 'senile dementia of the Alzheimer type',
'S.E.' : 'standard error',
'Se' : 'selenium',
'Sed rate' : 'sedimentation rate',
'semih.' : 'half an hour',
'SERM' : 'selective estrogen receptor modulator',
'SGA' : 'small for gestational age',
'SI' : 'international system of units',
'Si' : 'silicon',
'SIADH' : 'syndrome of inappropriate diuretic hormone',
'SIDS' : 'sudden infant death syndrome',
'Sig.' : 'write on label',
'SJS' : 'Stevens-Johnson syndrome',
'SLE' : 'systemic lupus erythematosus',
'SLP' : 'speech-language pathology',
'Sn' : 'tin',
'SNF' : 'skilled nursing facility',
'SNRI' : 'serotonin and norepinephrine reuptake inhibitor',
'SNS' : 'sympathetic nervous system',
'SOB' : 'shortness of breath',
'sol' : 'solution, dissolved',
's.o.s.' : 'if necessary',
'S/P' : 'no change after',
'SPECT' : 'single photon emission computed tomography',
'sp gr' : 'specific gravity',
'SPF' : 'skin protection factor',
'sph' : 'spherical',
'spt.' : 'spirit',
's.q.' : 'subcutaneous',
'Sr' : 'strontium',
'ss' : 'a half',
'SSRI' : 'selective serotonin reuptake inhibitor',
'SSS' : 'sick sinus syndrome',
'Staph' : 'Staphylococcus',
'stat.' : 'immediately',
'STD' : 'sexually transmitted disease',
'Strep' : 'Streptococcus',
'STS' : 'serologic test for syphilis',
'STU' : 'skin test unit',
'sup.' : 'superior',
'supf.' : 'superficial',
'SV' : 'stroke volume',
'SVC' : 'superior vena cava',
'Sx' : 'symptoms',
'syr.' : 'syrup',
'T' : 'temperature',
'T3' : 'triiodothyronine',
'T4' : 'tetraiodothyronine',
'T6' : 'thoracic nerve pair 6',
'TA' : 'toxin-antitoxin',
'Ta' : 'tantalum',
'T&A' : 'tonsillectomy and adenoidectomy',
'TAH' : 'total abdominal hysterectomy',
'TAT' : 'thematic apperception test',
'T.A.T.' : 'toxin-antitoxin',
'TB' : 'tuberculosis',
'Tb' : 'terbium',
't.d.s.' : 'to be taken three times daily',
'Te' : 'tellurium',
'TEE' : 'transesophageal echocardiogram',
'TEN' : 'toxic epidermal necrolysis',
'TENS' : 'transcutaneous electrical nerve stimulation',
'TG' : 'thyroglobulin',
'Th' : 'thorium',
'THR' : 'total hip replacement',
'TIA' : 'transient ischemic attack',
'TIBC' : 'total iron-binding capacity',
't.i.d.' : 'three times a day',
't.i.n.' : 'three times a night',
'tinct.' : 'tincture',
'TKR' : 'total knee replacement',
'Tl' : 'thallium',
'TLC' : 'total lung capacity',
'tlc' : 'total lung capacity',
'TM' : 'tympanic membrane',
'TMJ' : 'temporomandibular joint',
'TN' : 'trigeminal nerve',
'TNF' : 'tumor necrosis factor',
'TNF-I' : 'tumor necrosis factor inhibitor',
'TNF-α' : 'tumor necrosis factor alpha',
'TNM' : 'tumor-node-metastasis',
'TNT' : 'trinitrotoluene',
'TNTM' : 'too numerous to mention',
'top.' : 'topically',
'TORB' : 'telephone order read back',
'TPI' : 'Treponema pallidum immobilization test for syphilis',
'TPN' : 'total parenteral nutrition',
'TPO' : 'thyroid peroxidase',
'TPR' : 'temperature, pulse, and respiration',
'tr' : 'tincture',
'tinct.' : 'tincture',
'TRAP criteria' : 'tremor, rigidity, akinesia or postural instablity bradykinesia, and postural instability',
'Treg' : 'regulatory T cell',
'trit.' : 'triturate, grind',
'TSD' : 'time since death',
'TSE' : 'testicular selfexamination',
'TSH' : 'thyroid stimulating hormone',
'tTG' : 'antitransglutaminase',
'TUMA' : 'transurethral microwave antenna',
'TUR' : 'transurethral resection',
'TURP' : 'transurethral resection of the prostate',
'Tx' : 'treatment',
'UA' : 'urinalysis',
'UC' : 'ulcerative colitis',
'UE' : 'upper extremity',
'UHF' : 'ultrahigh frequency',
'ult. praes.' : 'the last ordered',
'umb' : 'umbilicus',
'ung.' : 'ointment',
'URI' : 'upper respiratory infection',
'USAN' : 'United States Adopted Name',
'USP' : 'United States Pharmacopeia',
'ut. dict.' : 'as directed',
'UTI' : 'urinary tract infection',
'UV' : 'ultraviolet',
'VA' : 'visual acuity',
'VC' : 'vital capacity',
'VD' : 'venereal disease',
'VDRL' : 'Venereal Disease Research Laboratories',
'VF' : 'ventricular fibrillation',
'Vf' : 'field of vision',
'VLBW' : 'very low birth weight',
'VLDL' : 'very low density lipoprotein',
'VMA' : 'vanillylmandelic acid',
'VOE' : 'VistA Office Electronic Health Record',
'vol.' : 'volume',
'vol %' : 'volume percent',
'VORB' : 'verbal order read back',
'V/Q' : 'ventilation',
'VS' : 'volumetric solution',
'VSD' : 'ventricular septal defect',
'VT' : 'ventricular tachycardia',
'vv' : 'veins',
'VZIG' : 'varicella zoster immune globulin',
'W' : 'tungsten',
'w' : 'watt',
'WAIS' : 'Wechsler Adult Intelligence Scale',
'WAP' : 'written action plan',
'WBC': 'white blood cell',
'WDWN' : 'well developed',
'WF' : 'white female',
'BF' : 'black female',
'WH' : 'well hydrated',
'WM' : 'white male',
'WN' : 'well nourished',
'WNL' : 'within normal limits',
'wt.' : 'weight',
'w/v.' : 'weight in volume',
'x' : 'multiplied by',
'y' : 'yocto',
'yo' : 'years old',
'yr' : 'year',
'Z' : 'atomic number',
'Zn' : 'zinc'
}
from os import listdir
from os.path import isfile, join
onlyfiles = [f for f in listdir('one_line_txt/') if isfile(join('one_line_txt/', f))]
import sys, re
import nltk
from nltk import word_tokenize, sent_tokenize
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
from nltk.stem import LancasterStemmer, WordNetLemmatizer
import multiprocessing
def files(name):
f = open('one_line_txt/'+str(name),'r')
arr2 = f.read()
arr2 = re.sub(r'[0-9]',' ', arr2)
arr2 = re.sub(r'[^a-zA-Z]',' ', arr2)
arr = []
for word in arr2.split():
for val in abb:
if word == val:
fin = abb[val]
break
else:
fin = word
arr.append(fin)
# arr = []
# with open(inFile,'r') as file:
# for line in file:
# for word in line.split():
# for val in abb:
# if word == val:
# fin = abb[val]
# break
# else:
# fin = word
# arr.append(fin)
arr = ' '.join(arr)
newx = arr.replace("\n", " ")
from nltk.tokenize import RegexpTokenizer
import string
ret = re.sub("-\s","-", newx)
l = nltk.word_tokenize(ret)
tokens = [x for x in l if not re.fullmatch('[' + string.punctuation + ']+', x)]
words = [word.lower() for word in tokens]
med_remove =['a', 'about', 'all', 'almost', 'also', 'although', 'always', 'among', 'an', 'and', 'another', 'any', 'are', 'as', 'at',
'be', 'because', 'been', 'being', 'between', 'both', 'but', 'by','can', 'could','did', 'do', 'does', 'done', 'due',
'each', 'either', 'enough', 'especially', 'etc','for', 'found', 'from', 'further','had', 'has', 'have', 'having', 'here', 'how', 'however',
'i', 'if', 'in', 'into', 'is', 'it', 'its', 'itself','just','kg','km','made', 'mainly', 'make', 'may', 'mg', 'might', 'ml', 'mm', 'most', 'mostly', 'must',
'nearly', 'neither', 'nor','obtained', 'of', 'often', 'on', 'our', 'overall','perhaps', 'pmid','quite','rather', 'really', 'regarding',
'seem', 'seen', 'several', 'should', 'show', 'showed', 'shown', 'shows', 'significantly', 'since', 'so', 'some', 'such',
'than', 'that', 'the', 'their', 'theirs', 'them', 'then', 'there', 'therefore', 'these', 'they', 'this', 'those', 'through', 'thus', 'to',
'upon', 'use', 'used', 'using','various', 'very','was', 'we', 'were', 'what', 'when', 'which', 'while', 'with', 'within', 'without', 'would']
new_stopwords = set(stopwords.words('english')+med_remove) - {'after','again','before','no','during','not'}
words = [w for w in words if not w in new_stopwords]
lemmatizer = WordNetLemmatizer()
lemmatized_output = ' '.join([lemmatizer.lemmatize(w) for w in words])
ret = re.sub("\s'","'", lemmatized_output)
final = ret.rstrip()
buff = open("new_m/"+str(name)+'.txt', "w")
buff.write(final)
buff.close()
# Run on a GPU
pool = multiprocessing.Pool(processes=4)
r = pool.map(files, onlyfiles)
pool.close()